Skip to content

Commit dec2611

Browse files
ronakrmclaudeDouweM
authored
Add Anthropic prompt caching support (#3363)
Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Douwe Maan <douwe@pydantic.dev>
1 parent a28ec53 commit dec2611

File tree

15 files changed

+630
-42
lines changed

15 files changed

+630
-42
lines changed

docs/models/anthropic.md

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,70 @@ model = AnthropicModel(
7777
agent = Agent(model)
7878
...
7979
```
80+
81+
## Prompt Caching
82+
83+
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
84+
85+
1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
86+
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
87+
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
88+
89+
You can combine all three strategies for maximum savings:
90+
91+
```python {test="skip"}
92+
from pydantic_ai import Agent, CachePoint, RunContext
93+
from pydantic_ai.models.anthropic import AnthropicModelSettings
94+
95+
agent = Agent(
96+
'anthropic:claude-sonnet-4-5',
97+
system_prompt='Detailed instructions...',
98+
model_settings=AnthropicModelSettings(
99+
anthropic_cache_instructions=True,
100+
anthropic_cache_tool_definitions=True,
101+
),
102+
)
103+
104+
@agent.tool
105+
def search_docs(ctx: RunContext, query: str) -> str:
106+
"""Search documentation."""
107+
return f'Results for {query}'
108+
109+
async def main():
110+
# First call - writes to cache
111+
result1 = await agent.run([
112+
'Long context from documentation...',
113+
CachePoint(),
114+
'First question'
115+
])
116+
117+
# Subsequent calls - read from cache (90% cost reduction)
118+
result2 = await agent.run([
119+
'Long context from documentation...', # Same content
120+
CachePoint(),
121+
'Second question'
122+
])
123+
print(f'First: {result1.output}')
124+
print(f'Second: {result2.output}')
125+
```
126+
127+
Access cache usage statistics via `result.usage()`:
128+
129+
```python {test="skip"}
130+
from pydantic_ai import Agent
131+
from pydantic_ai.models.anthropic import AnthropicModelSettings
132+
133+
agent = Agent(
134+
'anthropic:claude-sonnet-4-5',
135+
system_prompt='Instructions...',
136+
model_settings=AnthropicModelSettings(
137+
anthropic_cache_instructions=True
138+
),
139+
)
140+
141+
async def main():
142+
result = await agent.run('Your question')
143+
usage = result.usage()
144+
print(f'Cache write tokens: {usage.cache_write_tokens}')
145+
print(f'Cache read tokens: {usage.cache_read_tokens}')
146+
```

pydantic_ai_slim/pydantic_ai/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
BinaryImage,
4343
BuiltinToolCallPart,
4444
BuiltinToolReturnPart,
45+
CachePoint,
4546
DocumentFormat,
4647
DocumentMediaType,
4748
DocumentUrl,
@@ -141,6 +142,7 @@
141142
'BinaryContent',
142143
'BuiltinToolCallPart',
143144
'BuiltinToolReturnPart',
145+
'CachePoint',
144146
'DocumentFormat',
145147
'DocumentMediaType',
146148
'DocumentUrl',

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,24 @@ def __init__(
612612
raise ValueError('`BinaryImage` must be have a media type that starts with "image/"') # pragma: no cover
613613

614614

615+
@dataclass
616+
class CachePoint:
617+
"""A cache point marker for prompt caching.
618+
619+
Can be inserted into UserPromptPart.content to mark cache boundaries.
620+
Models that don't support caching will filter these out.
621+
622+
Supported by:
623+
624+
- Anthropic
625+
"""
626+
627+
kind: Literal['cache-point'] = 'cache-point'
628+
"""Type identifier, this is available on all parts as a discriminator."""
629+
630+
615631
MultiModalContent = ImageUrl | AudioUrl | DocumentUrl | VideoUrl | BinaryContent
616-
UserContent: TypeAlias = str | MultiModalContent
632+
UserContent: TypeAlias = str | MultiModalContent | CachePoint
617633

618634

619635
@dataclass(repr=False)
@@ -730,6 +746,9 @@ def otel_message_parts(self, settings: InstrumentationSettings) -> list[_otel_me
730746
if settings.include_content and settings.include_binary_content:
731747
converted_part['content'] = base64.b64encode(part.data).decode()
732748
parts.append(converted_part)
749+
elif isinstance(part, CachePoint):
750+
# CachePoint is a marker, not actual content - skip it for otel
751+
pass
733752
else:
734753
parts.append({'type': part.kind}) # pragma: no cover
735754
return parts

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
BinaryContent,
2020
BuiltinToolCallPart,
2121
BuiltinToolReturnPart,
22+
CachePoint,
2223
DocumentUrl,
2324
FilePart,
2425
FinishReason,
@@ -58,6 +59,7 @@
5859
from anthropic.types.beta import (
5960
BetaBase64PDFBlockParam,
6061
BetaBase64PDFSourceParam,
62+
BetaCacheControlEphemeralParam,
6163
BetaCitationsDelta,
6264
BetaCodeExecutionTool20250522Param,
6365
BetaCodeExecutionToolResultBlock,
@@ -148,6 +150,22 @@ class AnthropicModelSettings(ModelSettings, total=False):
148150
See [the Anthropic docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) for more information.
149151
"""
150152

153+
anthropic_cache_tool_definitions: bool
154+
"""Whether to add `cache_control` to the last tool definition.
155+
156+
When enabled, the last tool in the `tools` array will have `cache_control` set,
157+
allowing Anthropic to cache tool definitions and reduce costs.
158+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
159+
"""
160+
161+
anthropic_cache_instructions: bool
162+
"""Whether to add `cache_control` to the last system prompt block.
163+
164+
When enabled, the last system prompt will have `cache_control` set,
165+
allowing Anthropic to cache system instructions and reduce costs.
166+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
167+
"""
168+
151169

152170
@dataclass(init=False)
153171
class AnthropicModel(Model):
@@ -289,7 +307,7 @@ async def _messages_create(
289307
model_request_parameters: ModelRequestParameters,
290308
) -> BetaMessage | AsyncStream[BetaRawMessageStreamEvent]:
291309
# standalone function to make it easier to override
292-
tools = self._get_tools(model_request_parameters)
310+
tools = self._get_tools(model_request_parameters, model_settings)
293311
tools, mcp_servers, beta_features = self._add_builtin_tools(tools, model_request_parameters)
294312

295313
tool_choice: BetaToolChoiceParam | None
@@ -305,7 +323,7 @@ async def _messages_create(
305323
if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
306324
tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
307325

308-
system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters)
326+
system_prompt, anthropic_messages = await self._map_message(messages, model_request_parameters, model_settings)
309327

310328
try:
311329
extra_headers = model_settings.get('extra_headers', {})
@@ -411,8 +429,19 @@ async def _process_streamed_response(
411429
_provider_url=self._provider.base_url,
412430
)
413431

414-
def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[BetaToolUnionParam]:
415-
return [self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()]
432+
def _get_tools(
433+
self, model_request_parameters: ModelRequestParameters, model_settings: AnthropicModelSettings
434+
) -> list[BetaToolUnionParam]:
435+
tools: list[BetaToolUnionParam] = [
436+
self._map_tool_definition(r) for r in model_request_parameters.tool_defs.values()
437+
]
438+
439+
# Add cache_control to the last tool if enabled
440+
if tools and model_settings.get('anthropic_cache_tool_definitions'):
441+
last_tool = tools[-1]
442+
last_tool['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
443+
444+
return tools
416445

417446
def _add_builtin_tools(
418447
self, tools: list[BetaToolUnionParam], model_request_parameters: ModelRequestParameters
@@ -464,8 +493,11 @@ def _add_builtin_tools(
464493
return tools, mcp_servers, beta_features
465494

466495
async def _map_message( # noqa: C901
467-
self, messages: list[ModelMessage], model_request_parameters: ModelRequestParameters
468-
) -> tuple[str, list[BetaMessageParam]]:
496+
self,
497+
messages: list[ModelMessage],
498+
model_request_parameters: ModelRequestParameters,
499+
model_settings: AnthropicModelSettings,
500+
) -> tuple[str | list[BetaTextBlockParam], list[BetaMessageParam]]:
469501
"""Just maps a `pydantic_ai.Message` to a `anthropic.types.MessageParam`."""
470502
system_prompt_parts: list[str] = []
471503
anthropic_messages: list[BetaMessageParam] = []
@@ -477,7 +509,10 @@ async def _map_message( # noqa: C901
477509
system_prompt_parts.append(request_part.content)
478510
elif isinstance(request_part, UserPromptPart):
479511
async for content in self._map_user_prompt(request_part):
480-
user_content_params.append(content)
512+
if isinstance(content, CachePoint):
513+
self._add_cache_control_to_last_param(user_content_params)
514+
else:
515+
user_content_params.append(content)
481516
elif isinstance(request_part, ToolReturnPart):
482517
tool_result_block_param = BetaToolResultBlockParam(
483518
tool_use_id=_guard_tool_call_id(t=request_part),
@@ -637,12 +672,46 @@ async def _map_message( # noqa: C901
637672
if instructions := self._get_instructions(messages, model_request_parameters):
638673
system_prompt_parts.insert(0, instructions)
639674
system_prompt = '\n\n'.join(system_prompt_parts)
675+
676+
# If anthropic_cache_instructions is enabled, return system prompt as a list with cache_control
677+
if system_prompt and model_settings.get('anthropic_cache_instructions'):
678+
system_prompt_blocks = [
679+
BetaTextBlockParam(
680+
type='text', text=system_prompt, cache_control=BetaCacheControlEphemeralParam(type='ephemeral')
681+
)
682+
]
683+
return system_prompt_blocks, anthropic_messages
684+
640685
return system_prompt, anthropic_messages
641686

687+
@staticmethod
688+
def _add_cache_control_to_last_param(params: list[BetaContentBlockParam]) -> None:
689+
"""Add cache control to the last content block param.
690+
691+
See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching for more information.
692+
"""
693+
if not params:
694+
raise UserError(
695+
'CachePoint cannot be the first content in a user message - there must be previous content to attach the CachePoint to. '
696+
'To cache system instructions or tool definitions, use the `anthropic_cache_instructions` or `anthropic_cache_tool_definitions` settings instead.'
697+
)
698+
699+
# Only certain types support cache_control
700+
# See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#what-can-be-cached
701+
cacheable_types = {'text', 'tool_use', 'server_tool_use', 'image', 'tool_result'}
702+
# Cast needed because BetaContentBlockParam is a union including response Block types (Pydantic models)
703+
# that don't support dict operations, even though at runtime we only have request Param types (TypedDicts).
704+
last_param = cast(dict[str, Any], params[-1])
705+
if last_param['type'] not in cacheable_types:
706+
raise UserError(f'Cache control not supported for param type: {last_param["type"]}')
707+
708+
# Add cache_control to the last param
709+
last_param['cache_control'] = BetaCacheControlEphemeralParam(type='ephemeral')
710+
642711
@staticmethod
643712
async def _map_user_prompt(
644713
part: UserPromptPart,
645-
) -> AsyncGenerator[BetaContentBlockParam]:
714+
) -> AsyncGenerator[BetaContentBlockParam | CachePoint]:
646715
if isinstance(part.content, str):
647716
if part.content: # Only yield non-empty text
648717
yield BetaTextBlockParam(text=part.content, type='text')
@@ -651,6 +720,8 @@ async def _map_user_prompt(
651720
if isinstance(item, str):
652721
if item: # Only yield non-empty text
653722
yield BetaTextBlockParam(text=item, type='text')
723+
elif isinstance(item, CachePoint):
724+
yield item
654725
elif isinstance(item, BinaryContent):
655726
if item.is_image:
656727
yield BetaImageBlockParam(
@@ -717,6 +788,8 @@ def _map_usage(
717788
key: value for key, value in response_usage.model_dump().items() if isinstance(value, int)
718789
}
719790

791+
# Note: genai-prices already extracts cache_creation_input_tokens and cache_read_input_tokens
792+
# from the Anthropic response and maps them to cache_write_tokens and cache_read_tokens
720793
return usage.RequestUsage.extract(
721794
dict(model=model, usage=details),
722795
provider=provider,

pydantic_ai_slim/pydantic_ai/models/bedrock.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
BinaryContent,
2020
BuiltinToolCallPart,
2121
BuiltinToolReturnPart,
22+
CachePoint,
2223
DocumentUrl,
2324
FinishReason,
2425
ImageUrl,
@@ -672,6 +673,9 @@ async def _map_user_prompt(part: UserPromptPart, document_count: Iterator[int])
672673
content.append({'video': video})
673674
elif isinstance(item, AudioUrl): # pragma: no cover
674675
raise NotImplementedError('Audio is not supported yet.')
676+
elif isinstance(item, CachePoint):
677+
# Bedrock support has not been implemented yet: https://github.com/pydantic/pydantic-ai/issues/3418
678+
pass
675679
else:
676680
assert_never(item)
677681
return [{'role': 'user', 'content': content}]

pydantic_ai_slim/pydantic_ai/models/gemini.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
BinaryContent,
2222
BuiltinToolCallPart,
2323
BuiltinToolReturnPart,
24+
CachePoint,
2425
FilePart,
2526
FileUrl,
2627
ModelMessage,
@@ -391,6 +392,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[_GeminiPartUnion]
391392
else: # pragma: lax no cover
392393
file_data = _GeminiFileDataPart(file_data={'file_uri': item.url, 'mime_type': item.media_type})
393394
content.append(file_data)
395+
elif isinstance(item, CachePoint):
396+
# Gemini doesn't support prompt caching via CachePoint
397+
pass
394398
else:
395399
assert_never(item) # pragma: lax no cover
396400
return content

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
BinaryContent,
2020
BuiltinToolCallPart,
2121
BuiltinToolReturnPart,
22+
CachePoint,
2223
FilePart,
2324
FileUrl,
2425
FinishReason,
@@ -602,6 +603,9 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
602603
else:
603604
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
604605
content.append({'file_data': file_data_dict}) # pragma: lax no cover
606+
elif isinstance(item, CachePoint):
607+
# Google Gemini doesn't support prompt caching via CachePoint
608+
pass
605609
else:
606610
assert_never(item)
607611
return content

pydantic_ai_slim/pydantic_ai/models/huggingface.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
BinaryContent,
1919
BuiltinToolCallPart,
2020
BuiltinToolReturnPart,
21+
CachePoint,
2122
DocumentUrl,
2223
FilePart,
2324
FinishReason,
@@ -447,6 +448,9 @@ async def _map_user_prompt(part: UserPromptPart) -> ChatCompletionInputMessage:
447448
raise NotImplementedError('DocumentUrl is not supported for Hugging Face')
448449
elif isinstance(item, VideoUrl):
449450
raise NotImplementedError('VideoUrl is not supported for Hugging Face')
451+
elif isinstance(item, CachePoint):
452+
# Hugging Face doesn't support prompt caching via CachePoint
453+
pass
450454
else:
451455
assert_never(item)
452456
return ChatCompletionInputMessage(role='user', content=content) # type: ignore

0 commit comments

Comments
 (0)