@@ -96,11 +96,6 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
9696 pool := kvevents .NewPool (config .KVEventsConfig , kvCacheIndexer .KVBlockIndex ())
9797 pool .Start (ctx )
9898
99- chatTemplateRenderer := preprocessing .NewChatTemplatingProcessor ()
100- if err := chatTemplateRenderer .Initialize (); err != nil {
101- return nil , fmt .Errorf ("failed to initialize chat templating processor: %w" , err )
102- }
103-
10499 return & PrecisePrefixCacheScorer {
105100 typedName : plugins.TypedName {Type : PrecisePrefixCachePluginType },
106101 kvCacheIndexer : kvCacheIndexer ,
@@ -139,10 +134,9 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
139134 return nil
140135 }
141136
142- // Extract the flattened scores from the request
143137 scores , err := s .getScores (ctx , request )
144138 if err != nil {
145- logger .Error (err , "Failed to extract scores from request " )
139+ logger .Error (err , "Failed to get pod scores " )
146140 return nil
147141 }
148142 debugLogger .Info ("Got pod scores" , "scores" , scores )
@@ -159,17 +153,17 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
159153 return indexedScoresToNormalizedScoredPods (pods , podToKey , scores )
160154}
161155
162- // extractPrompt extracts the flattened prompt from the request.
163- // For chat completions, it renders the messages using the model's chat template.
164- // For regular completions, it uses the prompt directly.
156+ // getScores retrieves the pod scores from the KV-cache indexer
157+ // based on the provided LLM request.
158+ // If the request contains chat completions, it processes them accordingly.
159+ // If the request contains regular completions, it uses the prompt directly.
165160func (s * PrecisePrefixCacheScorer ) getScores (ctx context.Context , request * types.LLMRequest ) (map [string ]float64 , error ) {
166161 logger := log .FromContext (ctx ).WithName (s .typedName .String ())
167162 traceLogger := logger .V (logutil .TRACE )
168163
169164 traceLogger .Info ("Getting scores" ,
170- "target_model" , request .TargetModel ,
171- "has_chat_completions" , request .Body != nil && request .Body .ChatCompletions != nil ,
172- "has_completions" , request .Body != nil && request .Body .Completions != nil )
165+ "isChatCompletions" , request .Body != nil && request .Body .ChatCompletions != nil ,
166+ "isCompletions" , request .Body != nil && request .Body .Completions != nil )
173167
174168 // The upstream parser guarantees exactly one body is populated, but we defensively prioritize chat completions.
175169 // If an unexpected dual payload slips through (parser regression/new client), log it and use chat semantics.
@@ -198,10 +192,9 @@ func (s *PrecisePrefixCacheScorer) getScores(ctx context.Context, request *types
198192 }
199193
200194 traceLogger .Info ("Processing chat completion request" ,
201- "messages_count" , len (renderReq .Conversations ),
202- "tools_count" , len (renderReq .Tools ),
203- "documents_count" , len (renderReq .Documents ),
204- "target_model" , request .TargetModel )
195+ "messagesCount" , len (renderReq .Conversations ),
196+ "toolsCount" , len (renderReq .Tools ),
197+ "documentsCount" , len (renderReq .Documents ))
205198
206199 scores , err := s .kvCacheIndexer .GetPodScores (ctx , renderReq , "" , request .TargetModel , nil )
207200 if err != nil {
0 commit comments