pydantic
diff --git a/‎docs/models/anthropic.md‎
Lines changed: 67 additions & 0 deletions b/‎docs/models/anthropic.md‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 20 additions & 1 deletion b/‎pydantic_ai_slim/pydantic_ai/messages.py‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 81 additions & 8 deletions b/‎pydantic_ai_slim/pydantic_ai/models/anthropic.py‎
Lines changed: 81 additions & 8 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/bedrock.py‎
Lines changed: 4 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/models/bedrock.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/gemini.py‎
Lines changed: 4 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/models/gemini.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/google.py‎
Lines changed: 4 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/models/google.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/models/huggingface.py‎
Lines changed: 4 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/models/huggingface.py‎
Lines changed: 4 additions & 0 deletions
@@ -77,3 +77,70 @@ model = AnthropicModel(
 agent = Agent(model)
 ...
 ```
+
+## Prompt Caching
+
+Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
+
+1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
+2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
+3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
+
+You can combine all three strategies for maximum savings:
+
+```python {test="skip"}
+from pydantic_ai import Agent, CachePoint, RunContext
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    system_prompt='Detailed instructions...',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_instructions=True,
+        anthropic_cache_tool_definitions=True,
+    ),
+)
+
+@agent.tool
+def search_docs(ctx: RunContext, query: str) -> str:
+    """Search documentation."""
+    return f'Results for {query}'
+
+async def main():
+    # First call - writes to cache
+    result1 = await agent.run([
+        'Long context from documentation...',
+        CachePoint(),
+        'First question'
+    ])
+
+    # Subsequent calls - read from cache (90% cost reduction)
+    result2 = await agent.run([
+        'Long context from documentation...',  # Same content
+        CachePoint(),
+        'Second question'
+    ])
+    print(f'First: {result1.output}')
+    print(f'Second: {result2.output}')
+```
+
+Access cache usage statistics via `result.usage()`:
+
+```python {test="skip"}
+from pydantic_ai import Agent
+from pydantic_ai.models.anthropic import AnthropicModelSettings
+
+agent = Agent(
+    'anthropic:claude-sonnet-4-5',
+    system_prompt='Instructions...',
+    model_settings=AnthropicModelSettings(
+        anthropic_cache_instructions=True
+    ),
+)
+
+async def main():
+    result = await agent.run('Your question')
+    usage = result.usage()
+    print(f'Cache write tokens: {usage.cache_write_tokens}')
+    print(f'Cache read tokens: {usage.cache_read_tokens}')
+```
@@ -42,6 +42,7 @@
     BinaryImage,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentFormat,
     DocumentMediaType,
     DocumentUrl,
@@ -141,6 +142,7 @@
     'BinaryContent',
     'BuiltinToolCallPart',
     'BuiltinToolReturnPart',
+    'CachePoint',
     'DocumentFormat',
     'DocumentMediaType',
     'DocumentUrl',
 
@@ -612,8 +612,24 @@ def __init__(
             raise ValueError('`BinaryImage` must be have a media type that starts with "image/"')  # pragma: no cover
 
 
+@dataclass
+class CachePoint:
+    """A cache point marker for prompt caching.
+
+    Can be inserted into UserPromptPart.content to mark cache boundaries.
+    Models that don't support caching will filter these out.
+
+    Supported by:
+
+    - Anthropic
+    """
+
+    kind: Literal['cache-point'] = 'cache-point'
+    """Type identifier, this is available on all parts as a discriminator."""
+
+
 MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
-UserContent: TypeAlias = str | MultiModalContent
+UserContent: TypeAlias = str | MultiModalContent | CachePoint
 
 
 @dataclass(repr=False)
@@ -730,6 +746,9 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
                 if settings.include_content and settings.include_binary_content:
                     converted_part['content'] = base64.b64encode(part.data).decode()
                 parts.append(converted_part)
+            elif isinstance(part, CachePoint):
+                # CachePoint is a marker, not actual content - skip it for otel
+                pass
             else:
                 parts.append({'type': part.kind})  # pragma: no cover
         return parts
 
@@ -19,6 +19,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentUrl,
     FilePart,
     FinishReason,
@@ -58,6 +59,7 @@
     from anthropic.types.beta import (
         BetaBase64PDFBlockParam,
         BetaBase64PDFSourceParam,
+        BetaCacheControlEphemeralParam,
         BetaCitationsDelta,
         BetaCodeExecutionTool20250522Param,
         BetaCodeExecutionToolResultBlock,
@@ -148,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
     See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
     """
 
+    anthropic_cache_tool_definitions: bool
+    """Whether to add `cache_control` to the last tool definition.
+
+    When enabled, the last tool in the `tools` array will have `cache_control` set,
+    allowing Anthropic to cache tool definitions and reduce costs.
+    See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+    """
+
+    anthropic_cache_instructions: bool
+    """Whether to add `cache_control` to the last system prompt block.
+
+    When enabled, the last system prompt will have `cache_control` set,
+    allowing Anthropic to cache system instructions and reduce costs.
+    See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+    """
+
 
 @dataclass(init=False)
 class AnthropicModel(Model):
@@ -289,7 +307,7 @@ async def _messages_create(
         model_request_parameters: ModelRequestParameters,
     ) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
         # standalone function to make it easier to override
-        tools = self._get_tools(model_request_parameters)
+        tools = self._get_tools(model_request_parameters, model_settings)
         tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters)
 
         tool_choice: BetaToolChoiceParam | None
@@ -305,7 +323,7 @@ async def _messages_create(
             if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
                 tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
 
-        system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
+        system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings)
 
         try:
             extra_headers = model_settings.get('extra_headers', {})
@@ -411,8 +429,19 @@ async def _process_streamed_response(
             _provider_url=self._provider.base_url,
         )
 
-    def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[BetaToolUnionParam]:
-        return [self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()]
+    def _get_tools(
+        self, model_request_parameters: ModelRequestParameters, model_settings: AnthropicModelSettings
+    ) -> list[BetaToolUnionParam]:
+        tools: list[BetaToolUnionParam] = [
+            self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()
+        ]
+
+        # Add cache_control to the last tool if enabled
+        if tools and model_settings.get('anthropic_cache_tool_definitions'):
+            last_tool = tools[-1]
+            last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+
+        return tools
 
     def _add_builtin_tools(
         self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters
@@ -464,8 +493,11 @@ def _add_builtin_tools(
         return tools, mcp_servers, beta_features
 
     async def _map_message(  # noqa: C901
-        self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
-    ) -> tuple[str, list[BetaMessageParam]]:
+        self,
+        messages: list[ModelMessage],
+        model_request_parameters: ModelRequestParameters,
+        model_settings: AnthropicModelSettings,
+    ) -> tuple[str | list[BetaTextBlockParam], list[BetaMessageParam]]:
         """Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
         system_prompt_parts: list[str] = []
         anthropic_messages: list[BetaMessageParam] = []
@@ -477,7 +509,10 @@ async def _map_message(  # noqa: C901
                         system_prompt_parts.append(request_part.content)
                     elif isinstance(request_part, UserPromptPart):
                         async for content in self._map_user_prompt(request_part):
-                            user_content_params.append(content)
+                            if isinstance(content, CachePoint):
+                                self._add_cache_control_to_last_param(user_content_params)
+                            else:
+                                user_content_params.append(content)
                     elif isinstance(request_part, ToolReturnPart):
                         tool_result_block_param = BetaToolResultBlockParam(
                             tool_use_id=_guard_tool_call_id(t=request_part),
@@ -637,12 +672,46 @@ async def _map_message(  # noqa: C901
         if instructions := self._get_instructions(messages, model_request_parameters):
             system_prompt_parts.insert(0, instructions)
         system_prompt = '\n\n'.join(system_prompt_parts)
+
+        # If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
+        if system_prompt and model_settings.get('anthropic_cache_instructions'):
+            system_prompt_blocks = [
+                BetaTextBlockParam(
+                    type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
+                )
+            ]
+            return system_prompt_blocks, anthropic_messages
+
         return system_prompt, anthropic_messages
 
+    @staticmethod
+    def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
+        """Add cache control to the last content block param.
+
+        See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
+        """
+        if not params:
+            raise UserError(
+                'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to. '
+                'To cache system instructions or tool definitions, use the `anthropic_cache_instructions` or `anthropic_cache_tool_definitions` settings instead.'
+            )
+
+        # Only certain types support cache_control
+        # See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached
+        cacheable_types = {'text', 'tool_use', 'server_tool_use', 'image', 'tool_result'}
+        # Cast needed because BetaContentBlockParam is a union including response Block types (Pydantic models)
+        # that don't support dict operations, even though at runtime we only have request Param types (TypedDicts).
+        last_param = cast(dict[str, Any], params[-1])
+        if last_param['type'] not in cacheable_types:
+            raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
+
+        # Add cache_control to the last param
+        last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
+
     @staticmethod
     async def _map_user_prompt(
         part: UserPromptPart,
-    ) -> AsyncGenerator[BetaContentBlockParam]:
+    ) -> AsyncGenerator[BetaContentBlockParam | CachePoint]:
         if isinstance(part.content, str):
             if part.content:  # Only yield non-empty text
                 yield BetaTextBlockParam(text=part.content, type='text')
@@ -651,6 +720,8 @@ async def _map_user_prompt(
                 if isinstance(item, str):
                     if item:  # Only yield non-empty text
                         yield BetaTextBlockParam(text=item, type='text')
+                elif isinstance(item, CachePoint):
+                    yield item
                 elif isinstance(item, BinaryContent):
                     if item.is_image:
                         yield BetaImageBlockParam(
@@ -717,6 +788,8 @@ def _map_usage(
         key: value for key, value in response_usage.model_dump().items() if isinstance(value, int)
     }
 
+    # Note: genai-prices already extracts cache_creation_input_tokens and cache_read_input_tokens
+    # from the Anthropic response and maps them to cache_write_tokens and cache_read_tokens
     return usage.RequestUsage.extract(
         dict(model=model, usage=details),
         provider=provider,
 
@@ -19,6 +19,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentUrl,
     FinishReason,
     ImageUrl,
@@ -672,6 +673,9 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int])
                         content.append({'video': video})
                 elif isinstance(item, AudioUrl):  # pragma: no cover
                     raise NotImplementedError('Audio is not supported yet.')
+                elif isinstance(item, CachePoint):
+                    # Bedrock support has not been implemented yet: https://github.com/pydantic/pydantic-ai/issues/3418
+                    pass
                 else:
                     assert_never(item)
         return [{'role': 'user', 'content': content}]
 
@@ -21,6 +21,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     FilePart,
     FileUrl,
     ModelMessage,
@@ -391,6 +392,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion]
                     else:  # pragma: lax no cover
                         file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
                         content.append(file_data)
+                elif isinstance(item, CachePoint):
+                    # Gemini doesn't support prompt caching via CachePoint
+                    pass
                 else:
                     assert_never(item)  # pragma: lax no cover
         return content
 
@@ -19,6 +19,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     FilePart,
     FileUrl,
     FinishReason,
@@ -602,6 +603,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
                     else:
                         file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
                         content.append({'file_data': file_data_dict})  # pragma: lax no cover
+                elif isinstance(item, CachePoint):
+                    # Google Gemini doesn't support prompt caching via CachePoint
+                    pass
                 else:
                     assert_never(item)
         return content
 
@@ -18,6 +18,7 @@
     BinaryContent,
     BuiltinToolCallPart,
     BuiltinToolReturnPart,
+    CachePoint,
     DocumentUrl,
     FilePart,
     FinishReason,
@@ -447,6 +448,9 @@ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage:
                     raise NotImplementedError('DocumentUrl is not supported for Hugging Face')
                 elif isinstance(item, VideoUrl):
                     raise NotImplementedError('VideoUrl is not supported for Hugging Face')
+                elif isinstance(item, CachePoint):
+                    # Hugging Face doesn't support prompt caching via CachePoint
+                    pass
                 else:
                     assert_never(item)
         return ChatCompletionInputMessage(role='user', content=content)  # type: ignore