Skip to content

Commit cd7f004

Browse files
authored
minor refactoring (#482)
Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com>
1 parent 4c77042 commit cd7f004

File tree

1 file changed

+10
-17
lines changed

1 file changed

+10
-17
lines changed

pkg/plugins/scorer/precise_prefix_cache.go

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,6 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
9696
pool := kvevents.NewPool(config.KVEventsConfig, kvCacheIndexer.KVBlockIndex())
9797
pool.Start(ctx)
9898

99-
chatTemplateRenderer := preprocessing.NewChatTemplatingProcessor()
100-
if err := chatTemplateRenderer.Initialize(); err != nil {
101-
return nil, fmt.Errorf("failed to initialize chat templating processor: %w", err)
102-
}
103-
10499
return &PrecisePrefixCacheScorer{
105100
typedName: plugins.TypedName{Type: PrecisePrefixCachePluginType},
106101
kvCacheIndexer: kvCacheIndexer,
@@ -139,10 +134,9 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
139134
return nil
140135
}
141136

142-
// Extract the flattened scores from the request
143137
scores, err := s.getScores(ctx, request)
144138
if err != nil {
145-
logger.Error(err, "Failed to extract scores from request")
139+
logger.Error(err, "Failed to get pod scores")
146140
return nil
147141
}
148142
debugLogger.Info("Got pod scores", "scores", scores)
@@ -159,17 +153,17 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
159153
return indexedScoresToNormalizedScoredPods(pods, podToKey, scores)
160154
}
161155

162-
// extractPrompt extracts the flattened prompt from the request.
163-
// For chat completions, it renders the messages using the model's chat template.
164-
// For regular completions, it uses the prompt directly.
156+
// getScores retrieves the pod scores from the KV-cache indexer
157+
// based on the provided LLM request.
158+
// If the request contains chat completions, it processes them accordingly.
159+
// If the request contains regular completions, it uses the prompt directly.
165160
func (s *PrecisePrefixCacheScorer) getScores(ctx context.Context, request *types.LLMRequest) (map[string]float64, error) {
166161
logger := log.FromContext(ctx).WithName(s.typedName.String())
167162
traceLogger := logger.V(logutil.TRACE)
168163

169164
traceLogger.Info("Getting scores",
170-
"target_model", request.TargetModel,
171-
"has_chat_completions", request.Body != nil && request.Body.ChatCompletions != nil,
172-
"has_completions", request.Body != nil && request.Body.Completions != nil)
165+
"isChatCompletions", request.Body != nil && request.Body.ChatCompletions != nil,
166+
"isCompletions", request.Body != nil && request.Body.Completions != nil)
173167

174168
// The upstream parser guarantees exactly one body is populated, but we defensively prioritize chat completions.
175169
// If an unexpected dual payload slips through (parser regression/new client), log it and use chat semantics.
@@ -198,10 +192,9 @@ func (s *PrecisePrefixCacheScorer) getScores(ctx context.Context, request *types
198192
}
199193

200194
traceLogger.Info("Processing chat completion request",
201-
"messages_count", len(renderReq.Conversations),
202-
"tools_count", len(renderReq.Tools),
203-
"documents_count", len(renderReq.Documents),
204-
"target_model", request.TargetModel)
195+
"messagesCount", len(renderReq.Conversations),
196+
"toolsCount", len(renderReq.Tools),
197+
"documentsCount", len(renderReq.Documents))
205198

206199
scores, err := s.kvCacheIndexer.GetPodScores(ctx, renderReq, "", request.TargetModel, nil)
207200
if err != nil {

0 commit comments

Comments
 (0)