@@ -41,14 +41,23 @@ var _ framework.Scorer = &PrecisePrefixCacheScorer{}
4141// a new instance of the PrefixCacheTrackingPlugin.
4242func PrecisePrefixCachePluginFactory (name string , rawParameters json.RawMessage ,
4343 handle plugins.Handle ) (plugins.Plugin , error ) {
44+
45+ indexerConfig , err := kvcache .NewDefaultConfig ()
46+ if err != nil {
47+ return nil , fmt .Errorf ("failed to initialize indexer config: %w" , err )
48+ }
49+
4450 parameters := PrecisePrefixCachePluginConfig {
45- IndexerConfig : kvcache . NewDefaultConfig () ,
51+ IndexerConfig : indexerConfig ,
4652 KVEventsConfig : kvevents .DefaultConfig (),
4753 }
4854
4955 // read hugging face token from environment variable if set
50- if token := os .Getenv ("HF_TOKEN" ); token != "" {
51- parameters .IndexerConfig .TokenizersPoolConfig .HuggingFaceToken = token
56+ if token := os .Getenv ("HF_TOKEN" ); token != "" &&
57+ parameters .IndexerConfig != nil &&
58+ parameters .IndexerConfig .TokenizersPoolConfig != nil &&
59+ parameters .IndexerConfig .TokenizersPoolConfig .HFTokenizerConfig != nil {
60+ parameters .IndexerConfig .TokenizersPoolConfig .HFTokenizerConfig .HuggingFaceToken = token
5261 }
5362
5463 if rawParameters != nil {
@@ -93,9 +102,8 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
93102 }
94103
95104 return & PrecisePrefixCacheScorer {
96- typedName : plugins.TypedName {Type : PrecisePrefixCachePluginType },
97- kvCacheIndexer : kvCacheIndexer ,
98- chatTemplateRenderer : chatTemplateRenderer ,
105+ typedName : plugins.TypedName {Type : PrecisePrefixCachePluginType },
106+ kvCacheIndexer : kvCacheIndexer ,
99107 }, nil
100108}
101109
@@ -105,9 +113,8 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
105113// state, and the `kvevents.Pool` to subscribe to KV-cache events
106114// to keep the internal KV-cache index state up-to-date.
107115type PrecisePrefixCacheScorer struct {
108- typedName plugins.TypedName
109- kvCacheIndexer * kvcache.Indexer
110- chatTemplateRenderer * preprocessing.ChatTemplatingProcessor
116+ typedName plugins.TypedName
117+ kvCacheIndexer * kvcache.Indexer
111118}
112119
113120// TypedName returns the typed name of the plugin.
@@ -125,26 +132,20 @@ func (s *PrecisePrefixCacheScorer) WithName(name string) *PrecisePrefixCacheScor
125132// The returned scores are normalized to a range of 0-1.
126133func (s * PrecisePrefixCacheScorer ) Score (ctx context.Context , _ * types.CycleState , request * types.LLMRequest , pods []types.Pod ) map [types.Pod ]float64 {
127134 logger := log .FromContext (ctx ).WithName (s .typedName .String ())
135+ debugLogger := logger .V (logutil .DEBUG )
128136
129137 if request == nil {
130- logger .V (logutil .DEBUG ).Info ("Request is nil, skipping scoring" )
131- return nil
132- }
133-
134- // Extract the flattened prompt from the request
135- prompt , err := s .extractPrompt (ctx , request )
136- if err != nil {
137- logger .Error (err , "Failed to extract prompt from request" )
138+ debugLogger .Info ("Request is nil, skipping scoring" )
138139 return nil
139140 }
140141
141- scores , err := s .kvCacheIndexer .GetPodScores (ctx , prompt , request .TargetModel , nil )
142+ // Extract the flattened scores from the request
143+ scores , err := s .getScores (ctx , request )
142144 if err != nil {
143- logger .Error (err , "Failed to get pod scores" )
145+ logger .Error (err , "Failed to extract scores from request " )
144146 return nil
145147 }
146-
147- logger .V (logutil .DEBUG ).Info ("Got pod scores" , "scores" , scores )
148+ debugLogger .Info ("Got pod scores" , "scores" , scores )
148149
149150 podToKey := func (pod types.Pod ) (string , bool ) {
150151 metricsPod := pod .GetPod ()
@@ -161,20 +162,22 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
161162// extractPrompt extracts the flattened prompt from the request.
162163// For chat completions, it renders the messages using the model's chat template.
163164// For regular completions, it uses the prompt directly.
164- func (s * PrecisePrefixCacheScorer ) extractPrompt (ctx context.Context , request * types.LLMRequest ) (string , error ) {
165- traceLogger := log .FromContext (ctx ).V (logutil .TRACE ).WithName (s .typedName .String ())
165+ func (s * PrecisePrefixCacheScorer ) getScores (ctx context.Context , request * types.LLMRequest ) (map [string ]float64 , error ) {
166+ logger := log .FromContext (ctx ).WithName (s .typedName .String ())
167+ traceLogger := logger .V (logutil .TRACE )
168+
169+ traceLogger .Info ("Getting scores" ,
170+ "target_model" , request .TargetModel ,
171+ "has_chat_completions" , request .Body != nil && request .Body .ChatCompletions != nil ,
172+ "has_completions" , request .Body != nil && request .Body .Completions != nil )
166173
167174 // The upstream parser guarantees exactly one body is populated, but we defensively prioritize chat completions.
168175 // If an unexpected dual payload slips through (parser regression/new client), log it and use chat semantics.
169176 if request .Body != nil && request .Body .ChatCompletions != nil {
170177 if request .Body .Completions != nil {
171178 traceLogger .Info ("Both chat/completions and completions present; defaulting to chat/completions" )
172179 }
173- traceLogger .Info ("Processing chat completion request" ,
174- "messages_count" , len (request .Body .ChatCompletions .Messages ),
175- "target_model" , request .TargetModel )
176180
177- // Create render request
178181 renderReq := & preprocessing.RenderJinjaTemplateRequest {
179182 Conversations : make ([]preprocessing.ChatMessage , 0 ),
180183 Tools : request .Body .ChatCompletions .Tools ,
@@ -194,47 +197,30 @@ func (s *PrecisePrefixCacheScorer) extractPrompt(ctx context.Context, request *t
194197 })
195198 }
196199
197- // Fetch the chat template from the model
198- fetchReq := preprocessing.FetchChatTemplateRequest {
199- Model : request .TargetModel ,
200- }
201-
202- chatTemplate , chatTemplateKWArgs , err := s .chatTemplateRenderer .FetchChatTemplate (ctx , fetchReq )
203- if err != nil {
204- return "" , fmt .Errorf ("failed to fetch chat template: %w" , err )
205- }
206-
207- traceLogger .Info ("Chat template fetched" ,
208- "model" , request .TargetModel ,
209- "templateLength" , len (chatTemplate ),
210- "hasKwargs" , len (chatTemplateKWArgs ) > 0 )
211-
212- // Set the fetched template in the render request
213- renderReq .ChatTemplate = chatTemplate
214- renderReq .ChatTemplateKWArgs = chatTemplateKWArgs
200+ traceLogger .Info ("Processing chat completion request" ,
201+ "messages_count" , len (renderReq .Conversations ),
202+ "tools_count" , len (renderReq .Tools ),
203+ "documents_count" , len (renderReq .Documents ),
204+ "target_model" , request .TargetModel )
215205
216- // Render the template to get flattened prompt
217- resp , err := s .chatTemplateRenderer .RenderChatTemplate (ctx , renderReq )
206+ scores , err := s .kvCacheIndexer .GetPodScores (ctx , renderReq , "" , request .TargetModel , nil )
218207 if err != nil {
219- return "" , fmt .Errorf ("failed to render chat template : %w" , err )
208+ return nil , fmt .Errorf ("failed to get pod scores for chat/completions : %w" , err )
220209 }
221-
222- if len (resp .RenderedChats ) == 0 {
223- return "" , errors .New ("no rendered chat returned from template rendering" )
224- }
225-
226- prompt := resp .RenderedChats [0 ]
227- traceLogger .Info ("Chat template rendered successfully" ,
228- "promptLength" , len (prompt ))
229- return prompt , nil
210+ return scores , nil
230211 }
231212
232213 // For regular completions, use the prompt directly
233214 if request .Body != nil && request .Body .Completions != nil {
234215 prompt := request .Body .Completions .Prompt
235216 traceLogger .Info ("Using completion prompt directly" , "promptLength" , len (prompt ))
236- return prompt , nil
217+
218+ scores , err := s .kvCacheIndexer .GetPodScores (ctx , nil , prompt , request .TargetModel , nil )
219+ if err != nil {
220+ return nil , fmt .Errorf ("failed to get pod scores for completions: %w" , err )
221+ }
222+ return scores , nil
237223 }
238224
239- return "" , errors .New ("no valid prompt found in request" )
225+ return nil , errors .New ("no valid input found in request" )
240226}
0 commit comments