1919 BinaryContent ,
2020 BuiltinToolCallPart ,
2121 BuiltinToolReturnPart ,
22+ CachePoint ,
2223 DocumentUrl ,
2324 FilePart ,
2425 FinishReason ,
5859 from anthropic .types .beta import (
5960 BetaBase64PDFBlockParam ,
6061 BetaBase64PDFSourceParam ,
62+ BetaCacheControlEphemeralParam ,
6163 BetaCitationsDelta ,
6264 BetaCodeExecutionTool20250522Param ,
6365 BetaCodeExecutionToolResultBlock ,
@@ -148,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
148150 See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
149151 """
150152
153+ anthropic_cache_tool_definitions : bool
154+ """Whether to add `cache_control` to the last tool definition.
155+
156+ When enabled, the last tool in the `tools` array will have `cache_control` set,
157+ allowing Anthropic to cache tool definitions and reduce costs.
158+ See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
159+ """
160+
161+ anthropic_cache_instructions : bool
162+ """Whether to add `cache_control` to the last system prompt block.
163+
164+ When enabled, the last system prompt will have `cache_control` set,
165+ allowing Anthropic to cache system instructions and reduce costs.
166+ See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
167+ """
168+
151169
152170@dataclass (init = False )
153171class AnthropicModel (Model ):
@@ -289,7 +307,7 @@ async def _messages_create(
289307 model_request_parameters : ModelRequestParameters ,
290308 ) -> BetaMessage | AsyncStream [BetaRawMessageStreamEvent ]:
291309 # standalone function to make it easier to override
292- tools = self ._get_tools (model_request_parameters )
310+ tools = self ._get_tools (model_request_parameters , model_settings )
293311 tools , mcp_servers , beta_features = self ._add_builtin_tools (tools , model_request_parameters )
294312
295313 tool_choice : BetaToolChoiceParam | None
@@ -305,7 +323,7 @@ async def _messages_create(
305323 if (allow_parallel_tool_calls := model_settings .get ('parallel_tool_calls' )) is not None :
306324 tool_choice ['disable_parallel_tool_use' ] = not allow_parallel_tool_calls
307325
308- system_prompt , anthropic_messages = await self ._map_message (messages , model_request_parameters )
326+ system_prompt , anthropic_messages = await self ._map_message (messages , model_request_parameters , model_settings )
309327
310328 try :
311329 extra_headers = model_settings .get ('extra_headers' , {})
@@ -411,8 +429,19 @@ async def _process_streamed_response(
411429 _provider_url = self ._provider .base_url ,
412430 )
413431
414- def _get_tools (self , model_request_parameters : ModelRequestParameters ) -> list [BetaToolUnionParam ]:
415- return [self ._map_tool_definition (r ) for r in model_request_parameters .tool_defs .values ()]
432+ def _get_tools (
433+ self , model_request_parameters : ModelRequestParameters , model_settings : AnthropicModelSettings
434+ ) -> list [BetaToolUnionParam ]:
435+ tools : list [BetaToolUnionParam ] = [
436+ self ._map_tool_definition (r ) for r in model_request_parameters .tool_defs .values ()
437+ ]
438+
439+ # Add cache_control to the last tool if enabled
440+ if tools and model_settings .get ('anthropic_cache_tool_definitions' ):
441+ last_tool = tools [- 1 ]
442+ last_tool ['cache_control' ] = BetaCacheControlEphemeralParam (type = 'ephemeral' )
443+
444+ return tools
416445
417446 def _add_builtin_tools (
418447 self , tools : list [BetaToolUnionParam ], model_request_parameters : ModelRequestParameters
@@ -464,8 +493,11 @@ def _add_builtin_tools(
464493 return tools , mcp_servers , beta_features
465494
466495 async def _map_message ( # noqa: C901
467- self , messages : list [ModelMessage ], model_request_parameters : ModelRequestParameters
468- ) -> tuple [str , list [BetaMessageParam ]]:
496+ self ,
497+ messages : list [ModelMessage ],
498+ model_request_parameters : ModelRequestParameters ,
499+ model_settings : AnthropicModelSettings ,
500+ ) -> tuple [str | list [BetaTextBlockParam ], list [BetaMessageParam ]]:
469501 """Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
470502 system_prompt_parts : list [str ] = []
471503 anthropic_messages : list [BetaMessageParam ] = []
@@ -477,7 +509,10 @@ async def _map_message( # noqa: C901
477509 system_prompt_parts .append (request_part .content )
478510 elif isinstance (request_part , UserPromptPart ):
479511 async for content in self ._map_user_prompt (request_part ):
480- user_content_params .append (content )
512+ if isinstance (content , CachePoint ):
513+ self ._add_cache_control_to_last_param (user_content_params )
514+ else :
515+ user_content_params .append (content )
481516 elif isinstance (request_part , ToolReturnPart ):
482517 tool_result_block_param = BetaToolResultBlockParam (
483518 tool_use_id = _guard_tool_call_id (t = request_part ),
@@ -637,12 +672,46 @@ async def _map_message( # noqa: C901
637672 if instructions := self ._get_instructions (messages , model_request_parameters ):
638673 system_prompt_parts .insert (0 , instructions )
639674 system_prompt = '\n \n ' .join (system_prompt_parts )
675+
676+ # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
677+ if system_prompt and model_settings .get ('anthropic_cache_instructions' ):
678+ system_prompt_blocks = [
679+ BetaTextBlockParam (
680+ type = 'text' , text = system_prompt , cache_control = BetaCacheControlEphemeralParam (type = 'ephemeral' )
681+ )
682+ ]
683+ return system_prompt_blocks , anthropic_messages
684+
640685 return system_prompt , anthropic_messages
641686
687+ @staticmethod
688+ def _add_cache_control_to_last_param (params : list [BetaContentBlockParam ]) -> None :
689+ """Add cache control to the last content block param.
690+
691+ See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
692+ """
693+ if not params :
694+ raise UserError (
695+ 'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to. '
696+ 'To cache system instructions or tool definitions, use the `anthropic_cache_instructions` or `anthropic_cache_tool_definitions` settings instead.'
697+ )
698+
699+ # Only certain types support cache_control
700+ # See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached
701+ cacheable_types = {'text' , 'tool_use' , 'server_tool_use' , 'image' , 'tool_result' }
702+ # Cast needed because BetaContentBlockParam is a union including response Block types (Pydantic models)
703+ # that don't support dict operations, even though at runtime we only have request Param types (TypedDicts).
704+ last_param = cast (dict [str , Any ], params [- 1 ])
705+ if last_param ['type' ] not in cacheable_types :
706+ raise UserError (f'Cache control not supported for param type: { last_param ["type" ]} ' )
707+
708+ # Add cache_control to the last param
709+ last_param ['cache_control' ] = BetaCacheControlEphemeralParam (type = 'ephemeral' )
710+
642711 @staticmethod
643712 async def _map_user_prompt (
644713 part : UserPromptPart ,
645- ) -> AsyncGenerator [BetaContentBlockParam ]:
714+ ) -> AsyncGenerator [BetaContentBlockParam | CachePoint ]:
646715 if isinstance (part .content , str ):
647716 if part .content : # Only yield non-empty text
648717 yield BetaTextBlockParam (text = part .content , type = 'text' )
@@ -651,6 +720,8 @@ async def _map_user_prompt(
651720 if isinstance (item , str ):
652721 if item : # Only yield non-empty text
653722 yield BetaTextBlockParam (text = item , type = 'text' )
723+ elif isinstance (item , CachePoint ):
724+ yield item
654725 elif isinstance (item , BinaryContent ):
655726 if item .is_image :
656727 yield BetaImageBlockParam (
@@ -717,6 +788,8 @@ def _map_usage(
717788 key : value for key , value in response_usage .model_dump ().items () if isinstance (value , int )
718789 }
719790
791+ # Note: genai-prices already extracts cache_creation_input_tokens and cache_read_input_tokens
792+ # from the Anthropic response and maps them to cache_write_tokens and cache_read_tokens
720793 return usage .RequestUsage .extract (
721794 dict (model = model , usage = details ),
722795 provider = provider ,
0 commit comments