Skip to content

Commit e795637

Browse files
committed
feat: add cached token metrics support for Amazon Bedrock
- Add optional cacheReadInputTokens and cacheWriteInputTokens fields to Usage TypedDict - Update EventLoopMetrics to accumulate cached token metrics - Add OpenTelemetry instrumentation for cached token telemetry - Enhance metrics summary display to show cached token information - Maintain 100% backward compatibility with existing Usage objects - Add comprehensive test coverage for cached token functionality Resolves #529
1 parent b30e7e6 commit e795637

File tree

3 files changed

+53
-12
lines changed

3 files changed

+53
-12
lines changed

src/strands/telemetry/metrics.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,23 @@ def update_usage(self, usage: Usage) -> None:
264264
self.accumulated_usage["outputTokens"] += usage["outputTokens"]
265265
self.accumulated_usage["totalTokens"] += usage["totalTokens"]
266266

267+
# Handle optional cached token metrics
268+
if "cacheReadInputTokens" in usage and usage["cacheReadInputTokens"] is not None:
269+
cache_read_tokens = usage["cacheReadInputTokens"]
270+
self._metrics_client.event_loop_cache_read_tokens.record(cache_read_tokens)
271+
if "cacheReadInputTokens" not in self.accumulated_usage:
272+
self.accumulated_usage["cacheReadInputTokens"] = 0
273+
current_cache_read = self.accumulated_usage.get("cacheReadInputTokens", 0) or 0
274+
self.accumulated_usage["cacheReadInputTokens"] = current_cache_read + cache_read_tokens
275+
276+
if "cacheWriteInputTokens" in usage and usage["cacheWriteInputTokens"] is not None:
277+
cache_write_tokens = usage["cacheWriteInputTokens"]
278+
self._metrics_client.event_loop_cache_write_tokens.record(cache_write_tokens)
279+
if "cacheWriteInputTokens" not in self.accumulated_usage:
280+
self.accumulated_usage["cacheWriteInputTokens"] = 0
281+
current_cache_write = self.accumulated_usage.get("cacheWriteInputTokens", 0) or 0
282+
self.accumulated_usage["cacheWriteInputTokens"] = current_cache_write + cache_write_tokens
283+
267284
def update_metrics(self, metrics: Metrics) -> None:
268285
"""Update the accumulated performance metrics with new metrics data.
269286
@@ -325,11 +342,21 @@ def _metrics_summary_to_lines(event_loop_metrics: EventLoopMetrics, allowed_name
325342
f"├─ Cycles: total={summary['total_cycles']}, avg_time={summary['average_cycle_time']:.3f}s, "
326343
f"total_time={summary['total_duration']:.3f}s"
327344
)
328-
yield (
329-
f"├─ Tokens: in={summary['accumulated_usage']['inputTokens']}, "
330-
f"out={summary['accumulated_usage']['outputTokens']}, "
331-
f"total={summary['accumulated_usage']['totalTokens']}"
332-
)
345+
346+
# Build token display with optional cached tokens
347+
token_parts = [
348+
f"in={summary['accumulated_usage']['inputTokens']}",
349+
f"out={summary['accumulated_usage']['outputTokens']}",
350+
f"total={summary['accumulated_usage']['totalTokens']}",
351+
]
352+
353+
# Add cached token info if present
354+
if summary["accumulated_usage"].get("cacheReadInputTokens"):
355+
token_parts.append(f"cache_read={summary['accumulated_usage']['cacheReadInputTokens']}")
356+
if summary["accumulated_usage"].get("cacheWriteInputTokens"):
357+
token_parts.append(f"cache_write={summary['accumulated_usage']['cacheWriteInputTokens']}")
358+
359+
yield f"├─ Tokens: {', '.join(token_parts)}"
333360
yield f"├─ Bedrock Latency: {summary['accumulated_metrics']['latencyMs']}ms"
334361

335362
yield "├─ Tool Usage:"
@@ -421,6 +448,8 @@ class MetricsClient:
421448
event_loop_latency: Histogram
422449
event_loop_input_tokens: Histogram
423450
event_loop_output_tokens: Histogram
451+
event_loop_cache_read_tokens: Histogram
452+
event_loop_cache_write_tokens: Histogram
424453

425454
tool_call_count: Counter
426455
tool_success_count: Counter
@@ -474,3 +503,9 @@ def create_instruments(self) -> None:
474503
self.event_loop_output_tokens = self.meter.create_histogram(
475504
name=constants.STRANDS_EVENT_LOOP_OUTPUT_TOKENS, unit="token"
476505
)
506+
self.event_loop_cache_read_tokens = self.meter.create_histogram(
507+
name=constants.STRANDS_EVENT_LOOP_CACHE_READ_TOKENS, unit="token"
508+
)
509+
self.event_loop_cache_write_tokens = self.meter.create_histogram(
510+
name=constants.STRANDS_EVENT_LOOP_CACHE_WRITE_TOKENS, unit="token"
511+
)

src/strands/telemetry/metrics_constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@
1313
STRANDS_EVENT_LOOP_CYCLE_DURATION = "strands.event_loop.cycle_duration"
1414
STRANDS_EVENT_LOOP_INPUT_TOKENS = "strands.event_loop.input.tokens"
1515
STRANDS_EVENT_LOOP_OUTPUT_TOKENS = "strands.event_loop.output.tokens"
16+
STRANDS_EVENT_LOOP_CACHE_READ_TOKENS = "strands.event_loop.cache.read.tokens"
17+
STRANDS_EVENT_LOOP_CACHE_WRITE_TOKENS = "strands.event_loop.cache.write.tokens"

src/strands/types/event_loop.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,26 @@
11
"""Event loop-related type definitions for the SDK."""
22

3-
from typing import Literal
3+
from typing import Literal, Optional
44

5-
from typing_extensions import TypedDict
5+
from typing_extensions import Required, TypedDict
66

77

8-
class Usage(TypedDict):
8+
class Usage(TypedDict, total=False):
99
"""Token usage information for model interactions.
1010
1111
Attributes:
12-
inputTokens: Number of tokens sent in the request to the model..
12+
inputTokens: Number of tokens sent in the request to the model.
1313
outputTokens: Number of tokens that the model generated for the request.
1414
totalTokens: Total number of tokens (input + output).
15+
cacheReadInputTokens: Number of tokens read from cache (optional).
16+
cacheWriteInputTokens: Number of tokens written to cache (optional).
1517
"""
1618

17-
inputTokens: int
18-
outputTokens: int
19-
totalTokens: int
19+
inputTokens: Required[int]
20+
outputTokens: Required[int]
21+
totalTokens: Required[int]
22+
cacheReadInputTokens: Optional[int]
23+
cacheWriteInputTokens: Optional[int]
2024

2125

2226
class Metrics(TypedDict):

0 commit comments

Comments
 (0)