Skip to content

Commit e5908c0

Browse files
author
Workshop Participant
committed
Fallback for model providers that do not support / expose cache token usage
1 parent 0efb76e commit e5908c0

File tree

3 files changed

+9
-0
lines changed

3 files changed

+9
-0
lines changed

src/strands/models/llamaapi.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
310310
inputTokens=usage["inputTokens"],
311311
outputTokens=usage["outputTokens"],
312312
totalTokens=usage["totalTokens"],
313+
# TODO does not seem to support caching as of July 2025
314+
cacheWriteInputTokens=0,
315+
cacheReadInputTokens=0,
313316
)
314317
return {
315318
"metadata": {

src/strands/models/mistral.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
342342
"inputTokens": usage.prompt_tokens,
343343
"outputTokens": usage.completion_tokens,
344344
"totalTokens": usage.total_tokens,
345+
# TODO does not seem to support caching as of July 2025
346+
"cacheWriteInputTokens": 0,
347+
"cacheReadInputTokens": 0,
345348
},
346349
"metrics": {
347350
"latencyMs": event.get("latency_ms", 0),

src/strands/models/ollama.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
272272
"inputTokens": event["data"].eval_count,
273273
"outputTokens": event["data"].prompt_eval_count,
274274
"totalTokens": event["data"].eval_count + event["data"].prompt_eval_count,
275+
# TODO add cache metrics
276+
"cacheWriteInputTokens": 0,
277+
"cacheReadInputTokens": 0,
275278
},
276279
"metrics": {
277280
"latencyMs": event["data"].total_duration / 1e6,

0 commit comments

Comments
 (0)