Reads cache tokens from litellm response (openai, anthropic, bedrock)

Workshop Participant · Workshop Participant · commit 0efb76e68885 · 2025-07-09T09:25:19.000Z
diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py
@@ -158,7 +158,15 @@ async def stream(self, request: dict[str, Any]) -> AsyncGenerator[dict[str, Any]
         for event in response:
             _ = event
 
-        yield {"chunk_type": "metadata", "data": event.usage}
+        usage = event.usage
+        cache_read = max(
+            getattr(usage, "cache_read_input_tokens", 0),
+            getattr(getattr(usage, "prompt_tokens_details", {}), "cached_tokens", 0),
+        )
+
+        usage.prompt_tokens_details.cached_tokens = cache_read
+
+        yield {"chunk_type": "metadata", "data": usage}
 
     @override
     async def structured_output(