From ac52f8c7736479587e3d95d9e468ab578d681c15 Mon Sep 17 00:00:00 2001 From: Ro-ee Tal Date: Thu, 28 Aug 2025 17:15:29 -0400 Subject: [PATCH] feat: pruning conversation conversation manager docs --- docs/examples/python/conversation_pruning.md | 398 ++++++++++++++++++ docs/examples/python/conversation_pruning.py | 339 +++++++++++++++ .../agents/conversation-management.md | 50 +++ .../concepts/agents/conversation-pruning.md | 391 +++++++++++++++++ mkdocs.yml | 2 + 5 files changed, 1180 insertions(+) create mode 100644 docs/examples/python/conversation_pruning.md create mode 100644 docs/examples/python/conversation_pruning.py create mode 100644 docs/user-guide/concepts/agents/conversation-pruning.md diff --git a/docs/examples/python/conversation_pruning.md b/docs/examples/python/conversation_pruning.md new file mode 100644 index 00000000..471f9c79 --- /dev/null +++ b/docs/examples/python/conversation_pruning.md @@ -0,0 +1,398 @@ +# Conversation Pruning + +This guide demonstrates how to use the `PruningConversationManager` to selectively manage conversation history through intelligent pruning strategies. + +## Overview + +The `PruningConversationManager` provides a flexible approach to conversation management that: + +- **Preserves Structure**: Unlike summarization, pruning maintains the conversation's message structure +- **Uses Strategies**: Employs pluggable strategies to determine what and how to prune +- **Selective Preservation**: Keeps important messages (initial and recent) while pruning middle content +- **Proactive Management**: Can automatically prune when approaching token limits +- **Context-Aware**: Makes intelligent decisions based on message content and context + +## Basic Usage + +### Simple Tool Result Pruning + +The most common use case is compressing large tool results that can consume significant context space: + +```python +from strands import Agent +from strands.agent.conversation_manager import PruningConversationManager +from strands.agent.conversation_manager.strategies import LargeToolResultPruningStrategy + +# Create a strategy to compress large tool results +tool_result_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=10_000, # Compress results larger than 10k tokens + compression_template="[Tool result compressed: {original_size} → {compressed_size} tokens. Status: {status}]" +) + +# Create the pruning manager +conversation_manager = PruningConversationManager( + pruning_strategies=[tool_result_strategy], + preserve_recent_messages=3, # Keep 3 most recent messages + preserve_initial_messages=1, # Keep 1 initial message + enable_proactive_pruning=True, # Enable automatic pruning + pruning_threshold=0.8, # Prune when 80% of context is used + context_window_size=100_000 # 100k token context window +) + +agent = Agent( + conversation_manager=conversation_manager +) + +# The agent will now automatically compress large tool results +# and proactively prune when the conversation grows too large +``` + +### Multiple Pruning Strategies + +You can combine multiple strategies for comprehensive pruning: + +```python +from strands import Agent +from strands.agent.conversation_manager import PruningConversationManager +from strands.agent.conversation_manager.strategies import LargeToolResultPruningStrategy + +# Strategy 1: Compress large tool results +tool_result_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=5_000 +) + +# You can create custom strategies by implementing the PruningStrategy interface +class OldMessagePruningStrategy: + """Custom strategy to remove very old messages.""" + + def should_prune_message(self, message, context): + # Prune messages that are more than 20 messages old + return context["message_index"] < context["total_messages"] - 20 + + def prune_message(self, message, agent): + # Remove the message entirely + return None + + def get_strategy_name(self): + return "OldMessagePruningStrategy" + +old_message_strategy = OldMessagePruningStrategy() + +# Combine strategies +conversation_manager = PruningConversationManager( + pruning_strategies=[tool_result_strategy, old_message_strategy], + preserve_recent_messages=5, + preserve_initial_messages=2 +) + +agent = Agent(conversation_manager=conversation_manager) +``` + +## Advanced Configuration + +### Fine-Tuning Preservation Settings + +Control exactly which messages are preserved during pruning: + +```python +conversation_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy()], + preserve_initial_messages=3, # Keep first 3 messages (system prompt, initial exchange) + preserve_recent_messages=5, # Keep last 5 messages (recent context) + enable_proactive_pruning=True, + pruning_threshold=0.6, # More aggressive - prune at 60% capacity + context_window_size=150_000 +) +``` + +### Reactive vs Proactive Pruning + +```python +# Reactive only - prune only when context window is exceeded +reactive_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy()], + enable_proactive_pruning=False # Disable proactive pruning +) + +# Proactive - prune before hitting limits +proactive_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy()], + enable_proactive_pruning=True, + pruning_threshold=0.7, # Prune when 70% full + context_window_size=200_000 +) +``` + +## Custom Pruning Strategies + +Create your own pruning strategies by implementing the `PruningStrategy` interface: + +```python +from strands.agent.conversation_manager.pruning_conversation_manager import PruningStrategy +from strands.types.content import Message +from typing import Optional + +class TokenBasedPruningStrategy(PruningStrategy): + """Prune messages based on token count.""" + + def __init__(self, max_message_tokens: int = 1000): + self.max_message_tokens = max_message_tokens + + def should_prune_message(self, message: Message, context) -> bool: + """Prune messages that exceed the token limit.""" + return context["token_count"] > self.max_message_tokens + + def prune_message(self, message: Message, agent) -> Optional[Message]: + """Truncate the message content.""" + pruned_message = message.copy() + + for content in pruned_message.get("content", []): + if "text" in content: + text = content["text"] + if len(text) > 500: # Truncate long text + content["text"] = text[:500] + "... [truncated]" + + return pruned_message + + def get_strategy_name(self) -> str: + return "TokenBasedPruningStrategy" + +# Use the custom strategy +custom_strategy = TokenBasedPruningStrategy(max_message_tokens=2000) +conversation_manager = PruningConversationManager( + pruning_strategies=[custom_strategy] +) +``` + +### Content-Aware Pruning Strategy + +Create strategies that understand message content: + +```python +class DebugMessagePruningStrategy(PruningStrategy): + """Remove debug and logging messages to save context space.""" + + def should_prune_message(self, message: Message, context) -> bool: + """Identify debug messages by content patterns.""" + for content in message.get("content", []): + if "text" in content: + text = content["text"].lower() + # Look for debug patterns + debug_patterns = ["debug:", "log:", "trace:", "verbose:"] + if any(pattern in text for pattern in debug_patterns): + return True + return False + + def prune_message(self, message: Message, agent) -> Optional[Message]: + """Remove debug messages entirely.""" + return None # Remove the message completely + + def get_strategy_name(self) -> str: + return "DebugMessagePruningStrategy" +``` + +## Tool Result Compression + +The `LargeToolResultPruningStrategy` provides sophisticated compression for tool results: + +```python +# Detailed configuration for tool result compression +tool_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=25_000, # Compress results larger than 25k tokens + compression_template=( + "[COMPRESSED] Original: {original_size} tokens → Compressed: {compressed_size} tokens\n" + "Status: {status}\n" + "--- Compressed Content Below ---" + ), + enable_llm_compression=False # Use simple compression (LLM compression not yet implemented) +) + +conversation_manager = PruningConversationManager( + pruning_strategies=[tool_strategy], + preserve_recent_messages=4, + preserve_initial_messages=2 +) +``` + +### Understanding Tool Result Compression + +The strategy compresses tool results by: + +1. **Text Truncation**: Long text content is truncated with indicators +2. **JSON Summarization**: Large JSON objects are replaced with metadata and samples +3. **Metadata Preservation**: Tool status and IDs are always preserved +4. **Compression Notes**: Clear indicators show what was compressed + +Example of compressed output: +``` +[Tool result compressed: 15000 tokens → 500 tokens. Status: success] +{ + "_compressed": true, + "_n_original_keys": 150, + "_size": 15000, + "_type": "dict", + "sample_key_1": "sample_value_1", + "sample_key_2": "sample_value_2" +} +``` + +## Monitoring and Debugging + +### Tracking Pruning Activity + +```python +# Access pruning statistics +print(f"Messages removed: {conversation_manager.removed_message_count}") + +# Get current state for debugging +state = conversation_manager.get_state() +print(f"Manager state: {state}") +``` + +### Logging Pruning Decisions + +Enable logging to see pruning decisions: + +```python +import logging + +# Enable debug logging for pruning +logging.getLogger("strands.agent.conversation_manager.pruning_conversation_manager").setLevel(logging.DEBUG) +logging.getLogger("strands.agent.conversation_manager.strategies.tool_result_pruning").setLevel(logging.DEBUG) + +# Now you'll see detailed logs about pruning decisions +``` + +## Best Practices + +### 1. Choose Appropriate Thresholds + +```python +# For long-running conversations with large tool results +conversation_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy(max_tool_result_tokens=10_000)], + pruning_threshold=0.6, # Prune early to avoid context overflow + preserve_recent_messages=5, # Keep enough recent context + preserve_initial_messages=2 # Preserve system setup +) +``` + +### 2. Preserve Critical Messages + +```python +# For conversations where initial setup is crucial +conversation_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy()], + preserve_initial_messages=5, # Keep more initial context + preserve_recent_messages=3, # Standard recent context + pruning_threshold=0.8 # Less aggressive pruning +) +``` + +### 3. Combine with Other Managers + +You can switch between different conversation managers based on use case: + +```python +from strands.agent.conversation_manager import SummarizingConversationManager + +# Use pruning for tool-heavy conversations +pruning_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy()] +) + +# Use summarizing for text-heavy conversations +summarizing_manager = SummarizingConversationManager() + +# Switch based on conversation characteristics +if has_many_tool_results: + agent.conversation_manager = pruning_manager +else: + agent.conversation_manager = summarizing_manager +``` + +## Common Use Cases + +### 1. API Integration Agents + +For agents that make many API calls with large responses: + +```python +api_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=5_000, # API responses can be large + compression_template="[API Response compressed: {original_size} → {compressed_size} tokens]" +) + +conversation_manager = PruningConversationManager( + pruning_strategies=[api_strategy], + preserve_recent_messages=4, # Keep recent API context + pruning_threshold=0.7 +) +``` + +### 2. Data Analysis Agents + +For agents processing large datasets: + +```python +data_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=15_000, # Data outputs can be very large +) + +conversation_manager = PruningConversationManager( + pruning_strategies=[data_strategy], + preserve_initial_messages=3, # Keep data setup context + preserve_recent_messages=5, # Keep recent analysis + pruning_threshold=0.6 # Aggressive pruning for large data +) +``` + +### 3. Code Generation Agents + +For agents that generate and execute code: + +```python +# Custom strategy for code execution results +class CodeExecutionPruningStrategy(PruningStrategy): + def should_prune_message(self, message, context): + # Prune large code execution outputs + if context["has_tool_result"]: + for content in message.get("content", []): + if "toolResult" in content: + # Check if it's a code execution result + tool_result = content["toolResult"] + if "code_execution" in str(tool_result).lower(): + return context["token_count"] > 2000 + return False + + def prune_message(self, message, agent): + # Compress code execution results + pruned_message = message.copy() + for content in pruned_message.get("content", []): + if "toolResult" in content: + result = content["toolResult"] + if result.get("content"): + # Keep only first and last few lines of output + for result_content in result["content"]: + if "text" in result_content: + lines = result_content["text"].split('\n') + if len(lines) > 20: + compressed = ( + '\n'.join(lines[:5]) + + f'\n... [{len(lines)-10} lines omitted] ...\n' + + '\n'.join(lines[-5:]) + ) + result_content["text"] = compressed + return pruned_message + + def get_strategy_name(self): + return "CodeExecutionPruningStrategy" + +code_strategy = CodeExecutionPruningStrategy() +conversation_manager = PruningConversationManager( + pruning_strategies=[code_strategy], + preserve_recent_messages=3 +) +``` + +This comprehensive guide shows how to effectively use conversation pruning to manage context while preserving important information and conversation structure. \ No newline at end of file diff --git a/docs/examples/python/conversation_pruning.py b/docs/examples/python/conversation_pruning.py new file mode 100644 index 00000000..7b371191 --- /dev/null +++ b/docs/examples/python/conversation_pruning.py @@ -0,0 +1,339 @@ +""" +Conversation Pruning Example + +This example demonstrates how to use the PruningConversationManager to intelligently +manage conversation history by selectively pruning messages while preserving structure. +""" + +from strands import Agent +from strands.agent.conversation_manager import PruningConversationManager +from strands.agent.conversation_manager.strategies import LargeToolResultPruningStrategy +from strands.agent.conversation_manager.pruning_conversation_manager import PruningStrategy +from strands.types.content import Message +from strands.models import AnthropicModel +from typing import Optional +import json + + +def basic_pruning_example(): + """Demonstrate basic conversation pruning with tool result compression.""" + print("=== Basic Pruning Example ===") + + # Create a strategy to compress large tool results + tool_result_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=1000, # Compress results larger than 1k tokens + compression_template="[Compressed: {original_size} → {compressed_size} tokens. Status: {status}]" + ) + + # Create the pruning conversation manager + conversation_manager = PruningConversationManager( + pruning_strategies=[tool_result_strategy], + preserve_recent_messages=2, # Keep 2 most recent messages + preserve_initial_messages=1, # Keep 1 initial message + enable_proactive_pruning=True, # Enable automatic pruning + pruning_threshold=0.7, # Prune when 70% of context is used + context_window_size=10_000 # Small context for demo + ) + + # Create agent with pruning manager + model = AnthropicModel(model_id="claude-3-5-sonnet-20241022") + agent = Agent( + model=model, + conversation_manager=conversation_manager + ) + + print(f"Created agent with pruning manager") + print(f"Preserve recent: {conversation_manager.preserve_recent_messages}") + print(f"Preserve initial: {conversation_manager.preserve_initial_messages}") + print(f"Proactive pruning: {conversation_manager.enable_proactive_pruning}") + print(f"Pruning threshold: {conversation_manager.pruning_threshold}") + + return agent + + +def custom_pruning_strategy_example(): + """Demonstrate creating and using custom pruning strategies.""" + print("\n=== Custom Pruning Strategy Example ===") + + class DebugMessagePruningStrategy(PruningStrategy): + """Custom strategy to remove debug messages.""" + + def should_prune_message(self, message: Message, context) -> bool: + """Identify debug messages by content patterns.""" + for content in message.get("content", []): + if "text" in content: + text = content["text"].lower() + # Look for debug patterns + debug_patterns = ["debug:", "log:", "trace:", "[debug]"] + if any(pattern in text for pattern in debug_patterns): + return True + return False + + def prune_message(self, message: Message, agent) -> Optional[Message]: + """Remove debug messages entirely.""" + return None # Remove the message completely + + def get_strategy_name(self) -> str: + return "DebugMessagePruningStrategy" + + class LongMessagePruningStrategy(PruningStrategy): + """Custom strategy to truncate very long messages.""" + + def __init__(self, max_length: int = 500): + self.max_length = max_length + + def should_prune_message(self, message: Message, context) -> bool: + """Prune messages that are too long.""" + return context.get("token_count", 0) > 200 # Rough token estimate + + def prune_message(self, message: Message, agent) -> Optional[Message]: + """Truncate long messages.""" + pruned_message = message.copy() + + for content in pruned_message.get("content", []): + if "text" in content: + text = content["text"] + if len(text) > self.max_length: + content["text"] = text[:self.max_length] + "... [truncated]" + + return pruned_message + + def get_strategy_name(self) -> str: + return "LongMessagePruningStrategy" + + # Create custom strategies + debug_strategy = DebugMessagePruningStrategy() + long_message_strategy = LongMessagePruningStrategy(max_length=300) + tool_result_strategy = LargeToolResultPruningStrategy(max_tool_result_tokens=500) + + # Combine multiple strategies + conversation_manager = PruningConversationManager( + pruning_strategies=[debug_strategy, long_message_strategy, tool_result_strategy], + preserve_recent_messages=3, + preserve_initial_messages=1, + enable_proactive_pruning=True, + pruning_threshold=0.6 + ) + + model = AnthropicModel(model_id="claude-3-5-sonnet-20241022") + agent = Agent( + model=model, + conversation_manager=conversation_manager + ) + + print(f"Created agent with {len(conversation_manager.pruning_strategies)} custom strategies:") + for strategy in conversation_manager.pruning_strategies: + print(f" - {strategy.get_strategy_name()}") + + return agent + + +def simulate_conversation_with_pruning(agent: Agent): + """Simulate a conversation that will trigger pruning.""" + print("\n=== Simulating Conversation with Pruning ===") + + # Simulate adding messages that would trigger pruning + # Note: In a real scenario, these would come from actual agent interactions + + # Add initial message + initial_message = { + "role": "user", + "content": [{"text": "Hello, I need help with data analysis."}] + } + agent.messages.append(initial_message) + + # Add some regular messages + messages_to_add = [ + {"role": "assistant", "content": [{"text": "I'd be happy to help with data analysis!"}]}, + {"role": "user", "content": [{"text": "Debug: Starting analysis process..."}]}, # Should be pruned + {"role": "assistant", "content": [{"text": "Let me analyze your data step by step."}]}, + {"role": "user", "content": [{"text": "This is a very long message that contains a lot of detailed information about the data analysis requirements and specifications that might need to be truncated to save context space in the conversation history management system."}]}, # Should be truncated + {"role": "assistant", "content": [{"text": "I understand your requirements."}]}, + {"role": "user", "content": [{"text": "Log: Processing data batch 1 of 100..."}]}, # Should be pruned + {"role": "assistant", "content": [{"text": "Recent response 1"}]}, # Should be preserved + {"role": "user", "content": [{"text": "Recent message 2"}]}, # Should be preserved + ] + + for msg in messages_to_add: + agent.messages.append(msg) + + print(f"Added {len(messages_to_add) + 1} messages to conversation") + print(f"Total messages before pruning: {len(agent.messages)}") + + # Manually trigger pruning (normally this happens automatically) + original_count = len(agent.messages) + try: + agent.conversation_manager.reduce_context(agent) + print(f"Pruning completed successfully") + print(f"Messages after pruning: {len(agent.messages)}") + print(f"Messages removed: {agent.conversation_manager.removed_message_count}") + + # Show remaining messages + print("\nRemaining messages:") + for i, msg in enumerate(agent.messages): + role = msg["role"] + content_preview = str(msg["content"])[:100] + "..." if len(str(msg["content"])) > 100 else str(msg["content"]) + print(f" {i+1}. [{role}] {content_preview}") + + except Exception as e: + print(f"Pruning failed: {e}") + + +def demonstrate_proactive_pruning(): + """Demonstrate proactive pruning based on token thresholds.""" + print("\n=== Proactive Pruning Demonstration ===") + + # Create a manager with very low threshold for demo + conversation_manager = PruningConversationManager( + pruning_strategies=[LargeToolResultPruningStrategy(max_tool_result_tokens=100)], + preserve_recent_messages=2, + preserve_initial_messages=1, + enable_proactive_pruning=True, + pruning_threshold=0.3, # Very low threshold for demo + context_window_size=1000 # Very small context window for demo + ) + + model = AnthropicModel(model_id="claude-3-5-sonnet-20241022") + agent = Agent( + model=model, + conversation_manager=conversation_manager + ) + + # Add messages that will exceed the threshold + large_messages = [ + {"role": "user", "content": [{"text": "Initial message for context."}]}, + {"role": "assistant", "content": [{"text": "This is a large message with lots of content that will help demonstrate the proactive pruning functionality when the conversation grows beyond the configured threshold limits."}]}, + {"role": "user", "content": [{"text": "Another large message with substantial content that adds to the token count and helps trigger the proactive pruning mechanism built into the conversation manager."}]}, + {"role": "assistant", "content": [{"text": "Yet another substantial response that contributes to the growing conversation history and token usage."}]}, + {"role": "user", "content": [{"text": "Recent message 1"}]}, # Should be preserved + {"role": "assistant", "content": [{"text": "Recent message 2"}]}, # Should be preserved + ] + + for msg in large_messages: + agent.messages.append(msg) + + print(f"Added {len(large_messages)} messages") + print(f"Checking if proactive pruning should trigger...") + + # Check if proactive pruning would be triggered + should_prune = agent.conversation_manager._should_prune_proactively(agent) + print(f"Should prune proactively: {should_prune}") + + if should_prune: + print("Triggering proactive pruning...") + agent.conversation_manager.apply_management(agent) + print(f"Messages after proactive pruning: {len(agent.messages)}") + else: + print("Proactive pruning threshold not reached") + + +def demonstrate_tool_result_compression(): + """Demonstrate tool result compression functionality.""" + print("\n=== Tool Result Compression Demonstration ===") + + # Create a strategy specifically for tool result compression + tool_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=200, # Low threshold for demo + compression_template="[COMPRESSED TOOL RESULT] Original: {original_size} tokens → Compressed: {compressed_size} tokens | Status: {status}" + ) + + conversation_manager = PruningConversationManager( + pruning_strategies=[tool_strategy], + preserve_recent_messages=2, + preserve_initial_messages=1 + ) + + model = AnthropicModel(model_id="claude-3-5-sonnet-20241022") + agent = Agent( + model=model, + conversation_manager=conversation_manager + ) + + # Create a message with a large tool result + large_tool_result = { + "role": "user", + "content": [{ + "toolResult": { + "toolUseId": "test-123", + "status": "success", + "content": [{ + "text": "This is a very large tool result that contains extensive data analysis output with detailed statistics, multiple data points, comprehensive analysis results, and verbose logging information that would normally consume significant context space in the conversation history." * 5 + }, { + "json": { + "data": [{"id": i, "value": f"item_{i}", "details": f"detailed_info_{i}"} for i in range(50)], + "metadata": {"total_items": 50, "processing_time": "2.5s", "status": "completed"}, + "summary": {"key_findings": ["finding_1", "finding_2", "finding_3"], "recommendations": ["rec_1", "rec_2"]} + } + }] + } + }] + } + + # Add messages including the large tool result + messages = [ + {"role": "user", "content": [{"text": "Please analyze this data."}]}, + {"role": "assistant", "content": [{"toolUse": {"toolUseId": "test-123", "name": "analyze_data", "input": {}}}]}, + large_tool_result, # This should be compressed + {"role": "assistant", "content": [{"text": "Recent response"}]}, + {"role": "user", "content": [{"text": "Recent user message"}]} + ] + + for msg in messages: + agent.messages.append(msg) + + print(f"Added {len(messages)} messages including large tool result") + print("Original tool result size:", len(str(large_tool_result))) + + # Trigger pruning to compress the tool result + try: + agent.conversation_manager.reduce_context(agent) + print("Tool result compression completed") + + # Find and display the compressed tool result + for i, msg in enumerate(agent.messages): + for content in msg.get("content", []): + if "toolResult" in content: + print(f"\nCompressed tool result in message {i+1}:") + tool_result = content["toolResult"] + for result_content in tool_result.get("content", []): + if "text" in result_content: + print(f" Text: {result_content['text'][:200]}...") + elif "json" in result_content: + print(f" JSON: {str(result_content['json'])[:200]}...") + + except Exception as e: + print(f"Tool result compression failed: {e}") + + +def main(): + """Run all pruning examples.""" + print("Conversation Pruning Examples") + print("=" * 50) + + try: + # Basic pruning example + agent1 = basic_pruning_example() + + # Custom strategies example + agent2 = custom_pruning_strategy_example() + + # Simulate conversation with pruning + simulate_conversation_with_pruning(agent2) + + # Demonstrate proactive pruning + demonstrate_proactive_pruning() + + # Demonstrate tool result compression + demonstrate_tool_result_compression() + + print("\n" + "=" * 50) + print("All pruning examples completed successfully!") + + except Exception as e: + print(f"Error running examples: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docs/user-guide/concepts/agents/conversation-management.md b/docs/user-guide/concepts/agents/conversation-management.md index 701e20d0..fbbb6141 100644 --- a/docs/user-guide/concepts/agents/conversation-management.md +++ b/docs/user-guide/concepts/agents/conversation-management.md @@ -173,3 +173,53 @@ Key features of the `SummarizingConversationManager`: - **Tool Pair Preservation**: Ensures tool use and result message pairs aren't broken during summarization - **Flexible Configuration**: Customize summarization behavior through various parameters - **Fallback Safety**: Handles summarization failures gracefully + +#### PruningConversationManager + +The `PruningConversationManager` implements selective message pruning using configurable strategies. Unlike summarization which collapses multiple messages into one, pruning returns a list of messages where some have been compressed, removed, or truncated while others remain intact, preserving conversation structure and flow. + +**Key Features:** + +- **Strategy-Based Pruning**: Uses pluggable strategies to determine which messages to prune and how +- **Selective Preservation**: Preserves initial and recent messages while pruning middle content +- **Proactive Management**: Can automatically prune when conversation approaches token limits +- **Tool Result Compression**: Built-in strategy for compressing large tool outputs +- **Context-Aware Decisions**: Uses rich context information for intelligent pruning + +**Basic Usage:** + +```python +from strands import Agent +from strands.agent.conversation_manager import PruningConversationManager +from strands.agent.conversation_manager.strategies import LargeToolResultPruningStrategy + +# Create a pruning strategy for large tool results +tool_result_strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=50_000 # Compress tool results larger than 50k tokens +) + +# Create the pruning conversation manager +conversation_manager = PruningConversationManager( + pruning_strategies=[tool_result_strategy], + preserve_recent_messages=3, # Always keep 3 most recent messages + preserve_initial_messages=2, # Always keep 2 initial messages + enable_proactive_pruning=True, # Enable automatic pruning + pruning_threshold=0.7, # Prune when 70% of context window is used + context_window_size=200_000 # Context window size in tokens +) + +agent = Agent( + conversation_manager=conversation_manager +) +``` + +**Configuration Parameters:** + +- **`pruning_strategies`** (List[PruningStrategy]): List of strategies to apply for message pruning +- **`preserve_initial_messages`** (int, default: 1): Number of initial messages to never prune +- **`preserve_recent_messages`** (int, default: 2): Number of recent messages to never prune +- **`enable_proactive_pruning`** (bool, default: True): Whether to prune proactively based on threshold +- **`pruning_threshold`** (float, default: 0.7): Context usage threshold to trigger proactive pruning (0.1-1.0) +- **`context_window_size`** (int, default: 200000): Maximum context window size in tokens + +For more details on creating custom pruning strategies and advanced usage, see the [Conversation Pruning Guide](./conversation-pruning.md) and [Pruning Examples](../../../examples/python/conversation_pruning.md). diff --git a/docs/user-guide/concepts/agents/conversation-pruning.md b/docs/user-guide/concepts/agents/conversation-pruning.md new file mode 100644 index 00000000..0f528899 --- /dev/null +++ b/docs/user-guide/concepts/agents/conversation-pruning.md @@ -0,0 +1,391 @@ +# Conversation Pruning + +Conversation pruning is an intelligent approach to managing conversation history that selectively compresses, truncates, or removes messages while preserving the overall conversation structure and flow. Unlike summarization which collapses multiple messages into a single summary, pruning maintains the message-by-message structure while reducing the total context size. + +## Core Concepts + +### What is Pruning? + +Pruning operates on individual messages within a conversation, applying different strategies to reduce their size or remove them entirely. The key characteristics of pruning are: + +- **Selective Processing**: Only certain messages are modified based on configurable criteria +- **Structure Preservation**: The conversation maintains its original message sequence and roles +- **Strategy-Based**: Uses pluggable strategies to determine what and how to prune +- **Context-Aware**: Makes decisions based on message content, position, and metadata + +### Pruning vs. Other Approaches + +| Approach | Structure | Content | Use Case | +|----------|-----------|---------|----------| +| **Sliding Window** | Removes oldest messages | Preserves recent messages exactly | Simple, predictable context management | +| **Summarization** | Collapses multiple messages into summaries | Condenses information | Preserving historical context in condensed form | +| **Pruning** | Maintains message structure | Selectively modifies individual messages | Fine-grained control over specific content types | + +### When to Use Pruning + +Pruning is particularly effective when: + +- **Tool Results are Large**: API responses, data analysis outputs, or file contents consume significant context +- **Selective Preservation is Needed**: Some messages are more important than others +- **Structure Matters**: The conversation flow and message sequence must be maintained +- **Content-Specific Management**: Different types of content need different handling strategies + +## Architecture + +### PruningConversationManager + +The `PruningConversationManager` is the central component that orchestrates the pruning process: + +```python +from strands.agent.conversation_manager import PruningConversationManager + +manager = PruningConversationManager( + pruning_strategies=[...], # List of strategies to apply + preserve_initial_messages=1, # Messages to never prune from start + preserve_recent_messages=2, # Messages to never prune from end + enable_proactive_pruning=True, # Prune before hitting limits + pruning_threshold=0.7, # When to trigger proactive pruning + context_window_size=200_000 # Total context window size +) +``` + +### Pruning Strategies + +Strategies implement the `PruningStrategy` interface and define: + +1. **`should_prune_message()`**: Determines if a message should be pruned +2. **`prune_message()`**: Performs the actual pruning operation +3. **`get_strategy_name()`**: Returns a human-readable strategy name + +```python +from strands.agent.conversation_manager.pruning_conversation_manager import PruningStrategy + +class CustomPruningStrategy(PruningStrategy): + def should_prune_message(self, message, context): + # Decision logic here + return True # or False + + def prune_message(self, message, agent): + # Pruning logic here + return modified_message # or None to remove + + def get_strategy_name(self): + return "CustomPruningStrategy" +``` + +### Message Context + +The `MessageContext` provides rich information for pruning decisions: + +```python +{ + "token_count": 1500, # Estimated tokens in message + "has_tool_use": False, # Contains tool use content + "has_tool_result": True, # Contains tool result content + "message_index": 5, # Position in conversation + "total_messages": 10 # Total messages in conversation +} +``` + +## Pruning Process + +### 1. Context Analysis + +The `PruningContext` analyzes the entire conversation to provide: + +- Token count estimates for each message +- Content type detection (tool use, tool results, text) +- Message positioning and relationships +- Overall conversation statistics + +### 2. Message Evaluation + +For each message in the prunable range (excluding preserved initial and recent messages): + +1. **Strategy Consultation**: Each strategy evaluates if the message should be pruned +2. **Pruning Decision**: If any strategy indicates pruning, the message is processed +3. **Pruning Execution**: The first matching strategy performs the pruning operation + +### 3. Preservation Rules + +Messages are automatically preserved based on position: + +- **Initial Messages**: First N messages (system prompts, conversation setup) +- **Recent Messages**: Last N messages (current context, recent exchanges) +- **Prunable Range**: Middle messages that can be modified or removed + +``` +[Initial Messages] [Prunable Messages] [Recent Messages] + Preserved Can be Pruned Preserved +``` + +### 4. Validation and Application + +After pruning: + +1. **Effectiveness Check**: Validates that pruning actually reduced context size +2. **Message Update**: Applies the pruned messages to the agent's conversation +3. **Statistics Update**: Tracks removed message counts for session management + +## Built-in Strategies + +### LargeToolResultPruningStrategy + +The `LargeToolResultPruningStrategy` compresses large tool results while preserving essential information: + +**Features:** +- **Token-based Thresholds**: Configurable size limits for tool results +- **Content-aware Compression**: Different handling for text, JSON, and binary content +- **Metadata Preservation**: Maintains tool IDs, status, and execution context +- **Compression Indicators**: Clear markers showing what was compressed + +**Configuration:** +```python +from strands.agent.conversation_manager.strategies import LargeToolResultPruningStrategy + +strategy = LargeToolResultPruningStrategy( + max_tool_result_tokens=50_000, # Size threshold + compression_template="[Compressed: {original_size} → {compressed_size} tokens]", + enable_llm_compression=False # Future feature +) +``` + +**Compression Techniques:** + +1. **Text Truncation**: Long text content is truncated with clear indicators +2. **JSON Summarization**: Large JSON objects become metadata with samples +3. **Binary Handling**: Document and image content size estimation + +Example compressed output: +```json +{ + "toolUseId": "abc123", + "status": "success", + "content": [ + { + "text": "[Compressed: 15000 → 500 tokens]" + }, + { + "json": { + "_compressed": true, + "_n_original_keys": 150, + "_size": 15000, + "_type": "dict", + "sample_key": "sample_value" + } + } + ] +} +``` + +## Proactive vs. Reactive Pruning + +### Reactive Pruning + +Triggered when the context window is exceeded: + +- **Event-Driven**: Responds to context overflow exceptions +- **Emergency Response**: Applied when limits are already reached +- **Aggressive**: May need to remove more content to fit within limits + +### Proactive Pruning + +Triggered before reaching context limits: + +- **Threshold-Based**: Activates when context usage exceeds configured percentage +- **Preventive**: Avoids context overflow situations +- **Gradual**: Can apply lighter pruning since limits aren't yet reached + +```python +# Configure proactive pruning +manager = PruningConversationManager( + pruning_strategies=[...], + enable_proactive_pruning=True, + pruning_threshold=0.7, # Prune at 70% capacity + context_window_size=200_000 # Total context size +) +``` + +## Custom Strategy Development + +### Strategy Interface + +Implement the three required methods: + +```python +from strands.agent.conversation_manager.pruning_conversation_manager import PruningStrategy +from strands.types.content import Message +from typing import Optional + +class MyCustomStrategy(PruningStrategy): + def should_prune_message(self, message: Message, context) -> bool: + """ + Determine if this message should be pruned. + + Args: + message: The message to evaluate + context: MessageContext with metadata + + Returns: + True if the message should be pruned + """ + # Your decision logic here + return context["token_count"] > 1000 + + def prune_message(self, message: Message, agent) -> Optional[Message]: + """ + Perform the pruning operation. + + Args: + message: The message to prune + agent: Agent instance for context + + Returns: + Modified message, or None to remove entirely + """ + # Your pruning logic here + return modified_message + + def get_strategy_name(self) -> str: + """Return a descriptive name for this strategy.""" + return "MyCustomStrategy" +``` + +### Common Patterns + +**Content-Based Pruning:** +```python +def should_prune_message(self, message, context): + for content in message.get("content", []): + if "text" in content: + text = content["text"].lower() + if "debug" in text or "verbose" in text: + return True + return False +``` + +**Position-Based Pruning:** +```python +def should_prune_message(self, message, context): + # Prune messages in the middle third of conversation + total = context["total_messages"] + index = context["message_index"] + return total // 3 <= index <= 2 * total // 3 +``` + +**Size-Based Pruning:** +```python +def should_prune_message(self, message, context): + return context["token_count"] > self.max_tokens +``` + +**Role-Based Pruning:** +```python +def should_prune_message(self, message, context): + # Only prune assistant messages, preserve user messages + return message.get("role") == "assistant" +``` + +### Pruning Operations + +**Message Removal:** +```python +def prune_message(self, message, agent): + return None # Remove message entirely +``` + +**Content Truncation:** +```python +def prune_message(self, message, agent): + pruned = message.copy() + for content in pruned.get("content", []): + if "text" in content and len(content["text"]) > 500: + content["text"] = content["text"][:500] + "... [truncated]" + return pruned +``` + +**Content Replacement:** +```python +def prune_message(self, message, agent): + return { + "role": message["role"], + "content": [{"text": "[Message compressed due to size]"}] + } +``` + +## Best Practices + +### Strategy Selection + +1. **Start Simple**: Begin with built-in strategies like `LargeToolResultPruningStrategy` +2. **Combine Strategies**: Use multiple strategies for comprehensive pruning +3. **Test Thoroughly**: Validate that pruning preserves essential information +4. **Monitor Impact**: Track pruning effectiveness and conversation quality + +### Configuration Guidelines + +**Preservation Settings:** +- **Initial Messages**: Include system prompts and conversation setup (1-3 messages) +- **Recent Messages**: Maintain current context and recent exchanges (2-5 messages) +- **Balance**: Ensure enough messages remain for coherent conversation + +**Threshold Settings:** +- **Conservative**: 0.8-0.9 for critical applications +- **Balanced**: 0.6-0.7 for general use +- **Aggressive**: 0.4-0.5 for resource-constrained environments + +**Strategy Parameters:** +- **Tool Results**: Start with 10k-50k token limits +- **Text Content**: Consider 1k-5k character limits +- **Custom Logic**: Align with your specific use case requirements + +### Error Handling + +```python +try: + conversation_manager.reduce_context(agent) +except ContextWindowOverflowException: + # Fallback to more aggressive pruning + fallback_manager = PruningConversationManager( + pruning_strategies=[...], + preserve_recent_messages=1, # More aggressive + pruning_threshold=0.5 + ) + fallback_manager.reduce_context(agent) +``` + +### Monitoring and Debugging + +Enable logging to understand pruning decisions: + +```python +import logging + +# Enable pruning logs +logging.getLogger("strands.agent.conversation_manager.pruning_conversation_manager").setLevel(logging.DEBUG) +logging.getLogger("strands.agent.conversation_manager.strategies").setLevel(logging.DEBUG) + +# Monitor pruning statistics +print(f"Messages removed: {manager.removed_message_count}") +print(f"Manager state: {manager.get_state()}") +``` + +## Integration with Session Management + +Pruning integrates seamlessly with [Session Management](./session-management.md): + +- **Removed Message Tracking**: The `removed_message_count` helps session loading +- **State Persistence**: Pruning statistics are saved and restored with sessions +- **Efficient Loading**: Sessions can skip loading messages that were pruned + +```python +# Pruning state is automatically managed +state = agent.get_session_state() +# ... save state ... + +# Restore with pruning state +new_agent = Agent(conversation_manager=PruningConversationManager(...)) +new_agent.restore_session_state(state) +``` + +This comprehensive approach to conversation pruning provides fine-grained control over context management while maintaining conversation coherence and structure. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 311de0d4..d91864ae 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,7 @@ nav: - Hooks: user-guide/concepts/agents/hooks.md - Structured Output: user-guide/concepts/agents/structured-output.md - Conversation Management: user-guide/concepts/agents/conversation-management.md + - Conversation Pruning: user-guide/concepts/agents/conversation-pruning.md - Tools: - Overview: user-guide/concepts/tools/tools_overview.md - Python: user-guide/concepts/tools/python-tools.md @@ -138,6 +139,7 @@ nav: - Meta Tooling: examples/python/meta_tooling.md - MCP: examples/python/mcp_calculator.md - Multi-modal: examples/python/multimodal.md + - Conversation Pruning: examples/python/conversation_pruning.md - Contribute ❤️: https://github.com/strands-agents/sdk-python/blob/main/CONTRIBUTING.md - API Reference: - Agent: api-reference/agent.md