@@ -87,9 +87,15 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
8787 pool := kvevents .NewPool (config .KVEventsConfig , kvCacheIndexer .KVBlockIndex ())
8888 pool .Start (ctx )
8989
90+ chatTemplateRenderer := preprocessing .NewChatTemplatingProcessor ()
91+ if err := chatTemplateRenderer .Initialize (); err != nil {
92+ return nil , fmt .Errorf ("failed to initialize chat templating processor: %w" , err )
93+ }
94+
9095 return & PrecisePrefixCacheScorer {
91- typedName : plugins.TypedName {Type : PrecisePrefixCachePluginType },
92- kvCacheIndexer : kvCacheIndexer ,
96+ typedName : plugins.TypedName {Type : PrecisePrefixCachePluginType },
97+ kvCacheIndexer : kvCacheIndexer ,
98+ chatTemplateRenderer : chatTemplateRenderer ,
9399 }, nil
94100}
95101
@@ -99,8 +105,9 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
99105// state, and the `kvevents.Pool` to subscribe to KV-cache events
100106// to keep the internal KV-cache index state up-to-date.
101107type PrecisePrefixCacheScorer struct {
102- typedName plugins.TypedName
103- kvCacheIndexer * kvcache.Indexer
108+ typedName plugins.TypedName
109+ kvCacheIndexer * kvcache.Indexer
110+ chatTemplateRenderer * preprocessing.ChatTemplatingProcessor
104111}
105112
106113// TypedName returns the typed name of the plugin.
@@ -125,28 +132,19 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
125132 }
126133
127134 // Extract the flattened prompt from the request
128- logger .V (logutil .DEBUG ).Info ("Extracting prompt from request" ,
129- "target_model" , request .TargetModel ,
130- "has_chat_completions" , request .Body != nil && request .Body .ChatCompletions != nil ,
131- "has_completions" , request .Body != nil && request .Body .Completions != nil )
132-
133135 prompt , err := s .extractPrompt (ctx , request )
134136 if err != nil {
135- logger .Error (err , "Failed to extract prompt from request" , "target_model" , request . TargetModel )
137+ logger .Error (err , "Failed to extract prompt from request" )
136138 return nil
137139 }
138140
139- logger .V (logutil .DEBUG ).Info ("Getting pod scores" ,
140- "prompt_length" , len (prompt ),
141- "target_model" , request .TargetModel )
142-
143141 scores , err := s .kvCacheIndexer .GetPodScores (ctx , prompt , request .TargetModel , nil )
144142 if err != nil {
145- logger .Error (err , "Failed to get pod scores" , "target_model" , request . TargetModel )
143+ logger .Error (err , "Failed to get pod scores" )
146144 return nil
147145 }
148146
149- logger .V (logutil .DEBUG ).Info ("Got pod scores" , "scores_count" , len ( scores ), "scores" , scores , "target_model" , request . TargetModel )
147+ logger .V (logutil .DEBUG ).Info ("Got pod scores" , "scores" , scores )
150148
151149 podToKey := func (pod types.Pod ) (string , bool ) {
152150 metricsPod := pod .GetPod ()
@@ -164,22 +162,15 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
164162// For chat completions, it renders the messages using the model's chat template.
165163// For regular completions, it uses the prompt directly.
166164func (s * PrecisePrefixCacheScorer ) extractPrompt (ctx context.Context , request * types.LLMRequest ) (string , error ) {
167- logger := log .FromContext (ctx ).WithName (s .typedName .String ())
168-
169- // If it's a chat completion request, render the chat template.
170- // The upstream API guarantees exactly one of Completions or ChatCompletions is populated,
171- // but if both appear we prefer chat completions to match request semantics.
172- if request .Body != nil && request .Body .ChatCompletions != nil && request .Body .Completions != nil {
173- logger .V (logutil .DEBUG ).Info ("Both chat completions and completions present; prioritizing chat completions" , "target_model" , request .TargetModel )
174- }
165+ traceLogger := log .FromContext (ctx ).V (logutil .TRACE ).WithName (s .typedName .String ())
175166
176167 // The upstream parser guarantees exactly one body is populated, but we defensively prioritize chat completions.
177168 // If an unexpected dual payload slips through (parser regression/new client), log it and use chat semantics.
178169 if request .Body != nil && request .Body .ChatCompletions != nil {
179170 if request .Body .Completions != nil {
180- logger . V ( logutil . DEBUG ). Info ("Both chat_completions and completions present; defaulting to chat completions" , "target_model" , request . TargetModel )
171+ traceLogger . Info ("Both chat/completions and completions present; defaulting to chat/ completions" )
181172 }
182- logger . V ( logutil . DEBUG ) .Info ("Processing chat completion request" ,
173+ traceLogger .Info ("Processing chat completion request" ,
183174 "messages_count" , len (request .Body .ChatCompletions .Messages ),
184175 "target_model" , request .TargetModel )
185176
@@ -203,71 +194,47 @@ func (s *PrecisePrefixCacheScorer) extractPrompt(ctx context.Context, request *t
203194 })
204195 }
205196
206- // Initialize the chat templating processor
207- processor := preprocessing .NewChatTemplatingProcessor ()
208- if err := processor .Initialize (); err != nil {
209- return "" , fmt .Errorf ("failed to initialize chat templating processor: %w" , err )
210- }
211-
212197 // Fetch the chat template from the model
213198 fetchReq := preprocessing.FetchChatTemplateRequest {
214199 Model : request .TargetModel ,
215200 }
216- logger . V ( logutil . DEBUG ). Info ( "Fetching chat template" , "model" , request . TargetModel )
217- chatTemplate , chatTemplateKWArgs , err := processor .FetchChatTemplate (ctx , fetchReq )
201+
202+ chatTemplate , chatTemplateKWArgs , err := s . chatTemplateRenderer .FetchChatTemplate (ctx , fetchReq )
218203 if err != nil {
219- logger .Error (err , "Failed to fetch chat template" , "model" , request .TargetModel )
220204 return "" , fmt .Errorf ("failed to fetch chat template: %w" , err )
221205 }
222- logger .V (logutil .DEBUG ).Info ("Chat template fetched" ,
206+
207+ traceLogger .Info ("Chat template fetched" ,
223208 "model" , request .TargetModel ,
224- "template_length " , len (chatTemplate ),
225- "has_kwargs " , len (chatTemplateKWArgs ) > 0 )
209+ "templateLength " , len (chatTemplate ),
210+ "hasKwargs " , len (chatTemplateKWArgs ) > 0 )
226211
227212 // Set the fetched template in the render request
228213 renderReq .ChatTemplate = chatTemplate
229214 renderReq .ChatTemplateKWArgs = chatTemplateKWArgs
230215
231216 // Render the template to get flattened prompt
232- logger .V (logutil .DEBUG ).Info ("Rendering chat template" ,
233- "conversations_count" , len (renderReq .Conversations ))
234- resp , err := processor .RenderChatTemplate (ctx , renderReq )
217+ resp , err := s .chatTemplateRenderer .RenderChatTemplate (ctx , renderReq )
235218 if err != nil {
236- logger .Error (err , "Failed to render chat template" )
237219 return "" , fmt .Errorf ("failed to render chat template: %w" , err )
238220 }
239221
240222 if len (resp .RenderedChats ) == 0 {
241- logger .Error (nil , "No rendered chat returned from template rendering" )
242223 return "" , errors .New ("no rendered chat returned from template rendering" )
243224 }
244225
245226 prompt := resp .RenderedChats [0 ]
246- logger .V (logutil .DEBUG ).Info ("Chat template rendered successfully" , "prompt_length" , len (prompt ))
227+ traceLogger .Info ("Chat template rendered successfully" ,
228+ "promptLength" , len (prompt ))
247229 return prompt , nil
248230 }
249231
250232 // For regular completions, use the prompt directly
251233 if request .Body != nil && request .Body .Completions != nil {
252234 prompt := request .Body .Completions .Prompt
253- logger . V ( logutil . DEBUG ). Info ("Using completion prompt directly" , "prompt_length " , len (prompt ))
235+ traceLogger . Info ("Using completion prompt directly" , "promptLength " , len (prompt ))
254236 return prompt , nil
255237 }
256238
257- // Fallback: retain compatibility with legacy IGW versions (≤ v0.5.x) that extracted prompts
258- // directly from a raw `prompt` field (see gateway-api-inference-extension/pkg/epp/util/request/body.go).
259- if request .Body != nil {
260- // Try to marshal and extract prompt from raw data
261- if dataBytes , err := json .Marshal (request .Body ); err == nil {
262- var rawData map [string ]interface {}
263- if err := json .Unmarshal (dataBytes , & rawData ); err == nil {
264- if prompt , ok := rawData ["prompt" ].(string ); ok && prompt != "" {
265- logger .V (logutil .DEBUG ).Info ("Extracted prompt from raw data" , "prompt_length" , len (prompt ))
266- return prompt , nil
267- }
268- }
269- }
270- }
271-
272239 return "" , errors .New ("no valid prompt found in request" )
273240}
0 commit comments