diff --git a/packages/cdk/lambda-python/generic-agent-core-runtime/src/agent.py b/packages/cdk/lambda-python/generic-agent-core-runtime/src/agent.py index fb3756bd2..b63fbae46 100644 --- a/packages/cdk/lambda-python/generic-agent-core-runtime/src/agent.py +++ b/packages/cdk/lambda-python/generic-agent-core-runtime/src/agent.py @@ -9,7 +9,7 @@ from strands import Agent as StrandsAgent from strands.models import BedrockModel -from .config import extract_model_info, get_max_iterations, get_system_prompt +from .config import extract_model_info, get_max_iterations, get_system_prompt, supports_prompt_cache, supports_tools_cache from .tools import ToolManager from .types import Message, ModelInfo from .utils import ( @@ -81,12 +81,21 @@ async def process_request_streaming( # Create boto3 session and Bedrock model session = boto3.Session(region_name=region) - bedrock_model = BedrockModel( - model_id=model_id, - boto_session=session, - cache_prompt="default", - cache_tools="default", - ) + + # Configure caching based on model support (loaded from environment variable) + bedrock_model_params = { + "model_id": model_id, + "boto_session": session, + } + + # Only enable caching for officially supported models + if supports_prompt_cache(model_id): + bedrock_model_params["cache_prompt"] = "default" + + if supports_tools_cache(model_id): + bedrock_model_params["cache_tools"] = "default" + + bedrock_model = BedrockModel(**bedrock_model_params) # Process messages and prompt using utility functions processed_messages = process_messages(messages) diff --git a/packages/cdk/lambda-python/generic-agent-core-runtime/src/config.py b/packages/cdk/lambda-python/generic-agent-core-runtime/src/config.py index 4d546c3b4..594f9d1bb 100644 --- a/packages/cdk/lambda-python/generic-agent-core-runtime/src/config.py +++ b/packages/cdk/lambda-python/generic-agent-core-runtime/src/config.py @@ -1,7 +1,9 @@ """Configuration and environment setup for the agent core runtime.""" +import json import logging import os +import re from typing import Any # Configure root logger @@ -82,3 +84,34 @@ def get_max_iterations() -> int: except ValueError: logger.warning(f"Invalid MAX_ITERATIONS value. Defaulting to {DEFAULT_MAX_ITERATIONS}.") return DEFAULT_MAX_ITERATIONS + + +# CRI (Cross-Region Inference) prefix pattern +CRI_PREFIX_PATTERN = re.compile(r"^(global|us|eu|apac|jp)\.") + +# Prompt caching configuration +# Based on: https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html +# Load from environment variable (injected by CDK from TypeScript definition) +_supported_cache_fields_env = os.environ.get("SUPPORTED_CACHE_FIELDS") +if _supported_cache_fields_env: + SUPPORTED_CACHE_FIELDS: dict[str, list[str]] = json.loads(_supported_cache_fields_env) +else: + # Fallback if environment variable is not set (should not happen in production) + logger.warning("SUPPORTED_CACHE_FIELDS not found in environment, using empty fallback") + SUPPORTED_CACHE_FIELDS: dict[str, list[str]] = {} + + +def get_supported_cache_fields(model_id: str) -> list[str]: + """Get supported cache fields for a model (removes CRI prefix before lookup)""" + base_model_id = CRI_PREFIX_PATTERN.sub("", model_id) + return SUPPORTED_CACHE_FIELDS.get(base_model_id, []) + + +def supports_prompt_cache(model_id: str) -> bool: + """Check if a model supports prompt caching (system or messages)""" + return len(get_supported_cache_fields(model_id)) > 0 + + +def supports_tools_cache(model_id: str) -> bool: + """Check if a model supports tools caching""" + return "tools" in get_supported_cache_fields(model_id) diff --git a/packages/cdk/lib/construct/generic-agent-core.ts b/packages/cdk/lib/construct/generic-agent-core.ts index d6043ea54..8077d5b61 100644 --- a/packages/cdk/lib/construct/generic-agent-core.ts +++ b/packages/cdk/lib/construct/generic-agent-core.ts @@ -20,6 +20,7 @@ import { import { BucketInfo } from 'generative-ai-use-cases'; import * as path from 'path'; import { loadMCPConfig } from '../utils/mcp-config-loader'; +import { SUPPORTED_CACHE_FIELDS } from '@generative-ai-use-cases/common'; export interface AgentCoreRuntimeConfig { name: string; @@ -102,6 +103,7 @@ export class GenericAgentCore extends Construct { environmentVariables: { FILE_BUCKET: bucketName, MCP_SERVERS: JSON.stringify(genericMcpServers), + SUPPORTED_CACHE_FIELDS: JSON.stringify(SUPPORTED_CACHE_FIELDS), }, }, agentBuilder: { @@ -115,6 +117,7 @@ export class GenericAgentCore extends Construct { environmentVariables: { FILE_BUCKET: bucketName, MCP_SERVERS: JSON.stringify(agentBuilderMcpServers), + SUPPORTED_CACHE_FIELDS: JSON.stringify(SUPPORTED_CACHE_FIELDS), }, }, }; diff --git a/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap b/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap index f2588e9e3..a72b75db4 100644 --- a/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap +++ b/packages/cdk/test/__snapshots__/generative-ai-use-cases.test.ts.snap @@ -4526,7 +4526,7 @@ exports[`GenerativeAiUseCases matches the snapshot (closed network mode) 4`] = ` "AgentRuntimeArtifact": { "ContainerConfiguration": { "ContainerUri": { - "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:8c5a41bdaf8957310132e22107e4c5d5573580cf47ebe99a8c257159a5ba076e", + "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:69eb434ee242daa7224dd9fb0f5cf9142a71fe81dac805352a11509e5ecff870", }, }, }, @@ -4536,6 +4536,7 @@ exports[`GenerativeAiUseCases matches the snapshot (closed network mode) 4`] = ` "Ref": "GenericAgentCoreAgentCoreFileBucket0430DA42", }, "MCP_SERVERS": "{"time":{"command":"uvx","args":["mcp-server-time"],"metadata":{"category":"Utility","description":"Provides current time and date functionality"}},"aws-knowledge-mcp-server":{"command":"npx","args":["mcp-remote","https://knowledge-mcp.global.api.aws"],"metadata":{"category":"AWS","description":"AWS Knowledge Base MCP server for enterprise knowledge access"}},"awslabs.aws-documentation-mcp-server":{"command":"uvx","args":["awslabs.aws-documentation-mcp-server@latest"],"metadata":{"category":"AWS","description":"Access AWS documentation and guides"}},"awslabs.cdk-mcp-server":{"command":"uvx","args":["awslabs.cdk-mcp-server@latest"],"metadata":{"category":"AWS","description":"AWS CDK code generation and assistance"}},"awslabs.aws-diagram-mcp-server":{"command":"uvx","args":["awslabs.aws-diagram-mcp-server@latest"],"metadata":{"category":"AWS","description":"Generate AWS architecture diagrams"}},"awslabs.nova-canvas-mcp-server":{"command":"uvx","args":["awslabs.nova-canvas-mcp-server@latest"],"env":{"AWS_REGION":"us-east-1"},"metadata":{"category":"AI/ML","description":"Amazon Nova Canvas image generation"}},"tavily-search":{"command":"npx","args":["-y","mcp-remote","https://mcp.tavily.com/mcp/?tavilyApiKey="],"metadata":{"category":"Search","description":"Web search and research capabilities powered by Tavily"}}}", + "SUPPORTED_CACHE_FIELDS": "{"anthropic.claude-sonnet-4-5-20250929-v1:0":["messages","system","tools"],"anthropic.claude-haiku-4-5-20251001-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-1-20250805-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-sonnet-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-3-7-sonnet-20250219-v1:0":["messages","system","tools"],"anthropic.claude-3-5-haiku-20241022-v1:0":["messages","system","tools"],"amazon.nova-premier-v1:0":["messages","system"],"amazon.nova-pro-v1:0":["messages","system"],"amazon.nova-lite-v1:0":["messages","system"],"amazon.nova-micro-v1:0":["messages","system"]}", }, "NetworkConfiguration": { "NetworkMode": "PUBLIC", @@ -4794,7 +4795,7 @@ exports[`GenerativeAiUseCases matches the snapshot (closed network mode) 4`] = ` "AgentRuntimeArtifact": { "ContainerConfiguration": { "ContainerUri": { - "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:8c5a41bdaf8957310132e22107e4c5d5573580cf47ebe99a8c257159a5ba076e", + "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:69eb434ee242daa7224dd9fb0f5cf9142a71fe81dac805352a11509e5ecff870", }, }, }, @@ -4804,6 +4805,7 @@ exports[`GenerativeAiUseCases matches the snapshot (closed network mode) 4`] = ` "Ref": "GenericAgentCoreAgentCoreFileBucket0430DA42", }, "MCP_SERVERS": "{"time":{"command":"uvx","args":["mcp-server-time"],"metadata":{"category":"Utility","description":"Provides current time and date functionality"}},"aws-knowledge-mcp-server":{"command":"npx","args":["mcp-remote","https://knowledge-mcp.global.api.aws"],"metadata":{"category":"AWS","description":"AWS Knowledge Base MCP server for enterprise knowledge access"}},"awslabs.aws-documentation-mcp-server":{"command":"uvx","args":["awslabs.aws-documentation-mcp-server@latest"],"metadata":{"category":"AWS","description":"Access AWS documentation and guides"}},"awslabs.cdk-mcp-server":{"command":"uvx","args":["awslabs.cdk-mcp-server@latest"],"metadata":{"category":"AWS","description":"AWS CDK code generation and assistance"}},"awslabs.aws-diagram-mcp-server":{"command":"uvx","args":["awslabs.aws-diagram-mcp-server@latest"],"metadata":{"category":"AWS","description":"Generate AWS architecture diagrams"}},"awslabs.nova-canvas-mcp-server":{"command":"uvx","args":["awslabs.nova-canvas-mcp-server@latest"],"env":{"AWS_REGION":"us-east-1"},"metadata":{"category":"AI/ML","description":"Amazon Nova Canvas image generation"}},"tavily-search":{"command":"npx","args":["-y","mcp-remote","https://mcp.tavily.com/mcp/?tavilyApiKey="],"metadata":{"category":"Search","description":"Web search and research capabilities powered by Tavily"}}}", + "SUPPORTED_CACHE_FIELDS": "{"anthropic.claude-sonnet-4-5-20250929-v1:0":["messages","system","tools"],"anthropic.claude-haiku-4-5-20251001-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-1-20250805-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-sonnet-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-3-7-sonnet-20250219-v1:0":["messages","system","tools"],"anthropic.claude-3-5-haiku-20241022-v1:0":["messages","system","tools"],"amazon.nova-premier-v1:0":["messages","system"],"amazon.nova-pro-v1:0":["messages","system"],"amazon.nova-lite-v1:0":["messages","system"],"amazon.nova-micro-v1:0":["messages","system"]}", }, "NetworkConfiguration": { "NetworkMode": "PUBLIC", @@ -26482,7 +26484,7 @@ exports[`GenerativeAiUseCases matches the snapshot 4`] = ` "AgentRuntimeArtifact": { "ContainerConfiguration": { "ContainerUri": { - "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:8c5a41bdaf8957310132e22107e4c5d5573580cf47ebe99a8c257159a5ba076e", + "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:69eb434ee242daa7224dd9fb0f5cf9142a71fe81dac805352a11509e5ecff870", }, }, }, @@ -26492,6 +26494,7 @@ exports[`GenerativeAiUseCases matches the snapshot 4`] = ` "Ref": "GenericAgentCoreAgentCoreFileBucket0430DA42", }, "MCP_SERVERS": "{"time":{"command":"uvx","args":["mcp-server-time"],"metadata":{"category":"Utility","description":"Provides current time and date functionality"}},"aws-knowledge-mcp-server":{"command":"npx","args":["mcp-remote","https://knowledge-mcp.global.api.aws"],"metadata":{"category":"AWS","description":"AWS Knowledge Base MCP server for enterprise knowledge access"}},"awslabs.aws-documentation-mcp-server":{"command":"uvx","args":["awslabs.aws-documentation-mcp-server@latest"],"metadata":{"category":"AWS","description":"Access AWS documentation and guides"}},"awslabs.cdk-mcp-server":{"command":"uvx","args":["awslabs.cdk-mcp-server@latest"],"metadata":{"category":"AWS","description":"AWS CDK code generation and assistance"}},"awslabs.aws-diagram-mcp-server":{"command":"uvx","args":["awslabs.aws-diagram-mcp-server@latest"],"metadata":{"category":"AWS","description":"Generate AWS architecture diagrams"}},"awslabs.nova-canvas-mcp-server":{"command":"uvx","args":["awslabs.nova-canvas-mcp-server@latest"],"env":{"AWS_REGION":"us-east-1"},"metadata":{"category":"AI/ML","description":"Amazon Nova Canvas image generation"}},"tavily-search":{"command":"npx","args":["-y","mcp-remote","https://mcp.tavily.com/mcp/?tavilyApiKey="],"metadata":{"category":"Search","description":"Web search and research capabilities powered by Tavily"}}}", + "SUPPORTED_CACHE_FIELDS": "{"anthropic.claude-sonnet-4-5-20250929-v1:0":["messages","system","tools"],"anthropic.claude-haiku-4-5-20251001-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-1-20250805-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-sonnet-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-3-7-sonnet-20250219-v1:0":["messages","system","tools"],"anthropic.claude-3-5-haiku-20241022-v1:0":["messages","system","tools"],"amazon.nova-premier-v1:0":["messages","system"],"amazon.nova-pro-v1:0":["messages","system"],"amazon.nova-lite-v1:0":["messages","system"],"amazon.nova-micro-v1:0":["messages","system"]}", }, "NetworkConfiguration": { "NetworkMode": "PUBLIC", @@ -26750,7 +26753,7 @@ exports[`GenerativeAiUseCases matches the snapshot 4`] = ` "AgentRuntimeArtifact": { "ContainerConfiguration": { "ContainerUri": { - "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:8c5a41bdaf8957310132e22107e4c5d5573580cf47ebe99a8c257159a5ba076e", + "Fn::Sub": "123456890123.dkr.ecr.us-east-1.\${AWS::URLSuffix}/cdk-hnb659fds-container-assets-123456890123-us-east-1:69eb434ee242daa7224dd9fb0f5cf9142a71fe81dac805352a11509e5ecff870", }, }, }, @@ -26760,6 +26763,7 @@ exports[`GenerativeAiUseCases matches the snapshot 4`] = ` "Ref": "GenericAgentCoreAgentCoreFileBucket0430DA42", }, "MCP_SERVERS": "{"time":{"command":"uvx","args":["mcp-server-time"],"metadata":{"category":"Utility","description":"Provides current time and date functionality"}},"aws-knowledge-mcp-server":{"command":"npx","args":["mcp-remote","https://knowledge-mcp.global.api.aws"],"metadata":{"category":"AWS","description":"AWS Knowledge Base MCP server for enterprise knowledge access"}},"awslabs.aws-documentation-mcp-server":{"command":"uvx","args":["awslabs.aws-documentation-mcp-server@latest"],"metadata":{"category":"AWS","description":"Access AWS documentation and guides"}},"awslabs.cdk-mcp-server":{"command":"uvx","args":["awslabs.cdk-mcp-server@latest"],"metadata":{"category":"AWS","description":"AWS CDK code generation and assistance"}},"awslabs.aws-diagram-mcp-server":{"command":"uvx","args":["awslabs.aws-diagram-mcp-server@latest"],"metadata":{"category":"AWS","description":"Generate AWS architecture diagrams"}},"awslabs.nova-canvas-mcp-server":{"command":"uvx","args":["awslabs.nova-canvas-mcp-server@latest"],"env":{"AWS_REGION":"us-east-1"},"metadata":{"category":"AI/ML","description":"Amazon Nova Canvas image generation"}},"tavily-search":{"command":"npx","args":["-y","mcp-remote","https://mcp.tavily.com/mcp/?tavilyApiKey="],"metadata":{"category":"Search","description":"Web search and research capabilities powered by Tavily"}}}", + "SUPPORTED_CACHE_FIELDS": "{"anthropic.claude-sonnet-4-5-20250929-v1:0":["messages","system","tools"],"anthropic.claude-haiku-4-5-20251001-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-1-20250805-v1:0":["messages","system","tools"],"anthropic.claude-opus-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-sonnet-4-20250514-v1:0":["messages","system","tools"],"anthropic.claude-3-7-sonnet-20250219-v1:0":["messages","system","tools"],"anthropic.claude-3-5-haiku-20241022-v1:0":["messages","system","tools"],"amazon.nova-premier-v1:0":["messages","system"],"amazon.nova-pro-v1:0":["messages","system"],"amazon.nova-lite-v1:0":["messages","system"],"amazon.nova-micro-v1:0":["messages","system"]}", }, "NetworkConfiguration": { "NetworkMode": "PUBLIC",