Skip to content

Commit f2048dc

Browse files
authored
- removed redundant logging (#473)
- refactored a redundantly repeated call Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com>
1 parent f8d774b commit f2048dc

File tree

1 file changed

+27
-60
lines changed

1 file changed

+27
-60
lines changed

pkg/plugins/scorer/precise_prefix_cache.go

Lines changed: 27 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,15 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
8787
pool := kvevents.NewPool(config.KVEventsConfig, kvCacheIndexer.KVBlockIndex())
8888
pool.Start(ctx)
8989

90+
chatTemplateRenderer := preprocessing.NewChatTemplatingProcessor()
91+
if err := chatTemplateRenderer.Initialize(); err != nil {
92+
return nil, fmt.Errorf("failed to initialize chat templating processor: %w", err)
93+
}
94+
9095
return &PrecisePrefixCacheScorer{
91-
typedName: plugins.TypedName{Type: PrecisePrefixCachePluginType},
92-
kvCacheIndexer: kvCacheIndexer,
96+
typedName: plugins.TypedName{Type: PrecisePrefixCachePluginType},
97+
kvCacheIndexer: kvCacheIndexer,
98+
chatTemplateRenderer: chatTemplateRenderer,
9399
}, nil
94100
}
95101

@@ -99,8 +105,9 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
99105
// state, and the `kvevents.Pool` to subscribe to KV-cache events
100106
// to keep the internal KV-cache index state up-to-date.
101107
type PrecisePrefixCacheScorer struct {
102-
typedName plugins.TypedName
103-
kvCacheIndexer *kvcache.Indexer
108+
typedName plugins.TypedName
109+
kvCacheIndexer *kvcache.Indexer
110+
chatTemplateRenderer *preprocessing.ChatTemplatingProcessor
104111
}
105112

106113
// TypedName returns the typed name of the plugin.
@@ -125,28 +132,19 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
125132
}
126133

127134
// Extract the flattened prompt from the request
128-
logger.V(logutil.DEBUG).Info("Extracting prompt from request",
129-
"target_model", request.TargetModel,
130-
"has_chat_completions", request.Body != nil && request.Body.ChatCompletions != nil,
131-
"has_completions", request.Body != nil && request.Body.Completions != nil)
132-
133135
prompt, err := s.extractPrompt(ctx, request)
134136
if err != nil {
135-
logger.Error(err, "Failed to extract prompt from request", "target_model", request.TargetModel)
137+
logger.Error(err, "Failed to extract prompt from request")
136138
return nil
137139
}
138140

139-
logger.V(logutil.DEBUG).Info("Getting pod scores",
140-
"prompt_length", len(prompt),
141-
"target_model", request.TargetModel)
142-
143141
scores, err := s.kvCacheIndexer.GetPodScores(ctx, prompt, request.TargetModel, nil)
144142
if err != nil {
145-
logger.Error(err, "Failed to get pod scores", "target_model", request.TargetModel)
143+
logger.Error(err, "Failed to get pod scores")
146144
return nil
147145
}
148146

149-
logger.V(logutil.DEBUG).Info("Got pod scores", "scores_count", len(scores), "scores", scores, "target_model", request.TargetModel)
147+
logger.V(logutil.DEBUG).Info("Got pod scores", "scores", scores)
150148

151149
podToKey := func(pod types.Pod) (string, bool) {
152150
metricsPod := pod.GetPod()
@@ -164,22 +162,15 @@ func (s *PrecisePrefixCacheScorer) Score(ctx context.Context, _ *types.CycleStat
164162
// For chat completions, it renders the messages using the model's chat template.
165163
// For regular completions, it uses the prompt directly.
166164
func (s *PrecisePrefixCacheScorer) extractPrompt(ctx context.Context, request *types.LLMRequest) (string, error) {
167-
logger := log.FromContext(ctx).WithName(s.typedName.String())
168-
169-
// If it's a chat completion request, render the chat template.
170-
// The upstream API guarantees exactly one of Completions or ChatCompletions is populated,
171-
// but if both appear we prefer chat completions to match request semantics.
172-
if request.Body != nil && request.Body.ChatCompletions != nil && request.Body.Completions != nil {
173-
logger.V(logutil.DEBUG).Info("Both chat completions and completions present; prioritizing chat completions", "target_model", request.TargetModel)
174-
}
165+
traceLogger := log.FromContext(ctx).V(logutil.TRACE).WithName(s.typedName.String())
175166

176167
// The upstream parser guarantees exactly one body is populated, but we defensively prioritize chat completions.
177168
// If an unexpected dual payload slips through (parser regression/new client), log it and use chat semantics.
178169
if request.Body != nil && request.Body.ChatCompletions != nil {
179170
if request.Body.Completions != nil {
180-
logger.V(logutil.DEBUG).Info("Both chat_completions and completions present; defaulting to chat completions", "target_model", request.TargetModel)
171+
traceLogger.Info("Both chat/completions and completions present; defaulting to chat/completions")
181172
}
182-
logger.V(logutil.DEBUG).Info("Processing chat completion request",
173+
traceLogger.Info("Processing chat completion request",
183174
"messages_count", len(request.Body.ChatCompletions.Messages),
184175
"target_model", request.TargetModel)
185176

@@ -203,71 +194,47 @@ func (s *PrecisePrefixCacheScorer) extractPrompt(ctx context.Context, request *t
203194
})
204195
}
205196

206-
// Initialize the chat templating processor
207-
processor := preprocessing.NewChatTemplatingProcessor()
208-
if err := processor.Initialize(); err != nil {
209-
return "", fmt.Errorf("failed to initialize chat templating processor: %w", err)
210-
}
211-
212197
// Fetch the chat template from the model
213198
fetchReq := preprocessing.FetchChatTemplateRequest{
214199
Model: request.TargetModel,
215200
}
216-
logger.V(logutil.DEBUG).Info("Fetching chat template", "model", request.TargetModel)
217-
chatTemplate, chatTemplateKWArgs, err := processor.FetchChatTemplate(ctx, fetchReq)
201+
202+
chatTemplate, chatTemplateKWArgs, err := s.chatTemplateRenderer.FetchChatTemplate(ctx, fetchReq)
218203
if err != nil {
219-
logger.Error(err, "Failed to fetch chat template", "model", request.TargetModel)
220204
return "", fmt.Errorf("failed to fetch chat template: %w", err)
221205
}
222-
logger.V(logutil.DEBUG).Info("Chat template fetched",
206+
207+
traceLogger.Info("Chat template fetched",
223208
"model", request.TargetModel,
224-
"template_length", len(chatTemplate),
225-
"has_kwargs", len(chatTemplateKWArgs) > 0)
209+
"templateLength", len(chatTemplate),
210+
"hasKwargs", len(chatTemplateKWArgs) > 0)
226211

227212
// Set the fetched template in the render request
228213
renderReq.ChatTemplate = chatTemplate
229214
renderReq.ChatTemplateKWArgs = chatTemplateKWArgs
230215

231216
// Render the template to get flattened prompt
232-
logger.V(logutil.DEBUG).Info("Rendering chat template",
233-
"conversations_count", len(renderReq.Conversations))
234-
resp, err := processor.RenderChatTemplate(ctx, renderReq)
217+
resp, err := s.chatTemplateRenderer.RenderChatTemplate(ctx, renderReq)
235218
if err != nil {
236-
logger.Error(err, "Failed to render chat template")
237219
return "", fmt.Errorf("failed to render chat template: %w", err)
238220
}
239221

240222
if len(resp.RenderedChats) == 0 {
241-
logger.Error(nil, "No rendered chat returned from template rendering")
242223
return "", errors.New("no rendered chat returned from template rendering")
243224
}
244225

245226
prompt := resp.RenderedChats[0]
246-
logger.V(logutil.DEBUG).Info("Chat template rendered successfully", "prompt_length", len(prompt))
227+
traceLogger.Info("Chat template rendered successfully",
228+
"promptLength", len(prompt))
247229
return prompt, nil
248230
}
249231

250232
// For regular completions, use the prompt directly
251233
if request.Body != nil && request.Body.Completions != nil {
252234
prompt := request.Body.Completions.Prompt
253-
logger.V(logutil.DEBUG).Info("Using completion prompt directly", "prompt_length", len(prompt))
235+
traceLogger.Info("Using completion prompt directly", "promptLength", len(prompt))
254236
return prompt, nil
255237
}
256238

257-
// Fallback: retain compatibility with legacy IGW versions (≤ v0.5.x) that extracted prompts
258-
// directly from a raw `prompt` field (see gateway-api-inference-extension/pkg/epp/util/request/body.go).
259-
if request.Body != nil {
260-
// Try to marshal and extract prompt from raw data
261-
if dataBytes, err := json.Marshal(request.Body); err == nil {
262-
var rawData map[string]interface{}
263-
if err := json.Unmarshal(dataBytes, &rawData); err == nil {
264-
if prompt, ok := rawData["prompt"].(string); ok && prompt != "" {
265-
logger.V(logutil.DEBUG).Info("Extracted prompt from raw data", "prompt_length", len(prompt))
266-
return prompt, nil
267-
}
268-
}
269-
}
270-
}
271-
272239
return "", errors.New("no valid prompt found in request")
273240
}

0 commit comments

Comments
 (0)