From e361d8333c9c56898e0b5fd0a7dc6620c23a1e56 Mon Sep 17 00:00:00 2001 From: aedpooji Date: Wed, 5 Nov 2025 19:02:49 -0500 Subject: [PATCH 1/4] Typos/issues fixed --- docs/examples/README.md | 2 +- docs/examples/cdk/deploy_to_ec2/package.json | 2 +- .../cdk/deploy_to_fargate/package.json | 2 +- docs/examples/python/agents_workflows.md | 36 +++++++++---------- docs/examples/python/mcp_calculator.md | 2 +- docs/examples/python/memory_agent.md | 2 +- docs/examples/python/meta_tooling.md | 2 +- docs/examples/python/meta_tooling.py | 2 +- docs/examples/python/weather_forecaster.md | 2 +- docs/user-guide/concepts/agents/prompts.md | 2 +- .../concepts/experimental/agent-config.md | 4 +-- .../concepts/multi-agent/agent-to-agent.md | 2 +- docs/user-guide/concepts/multi-agent/graph.md | 2 +- docs/user-guide/concepts/tools/executors.md | 2 +- .../deploy/operating-agents-in-production.md | 8 ++--- .../observability-evaluation/logs.md | 4 +-- .../observability-evaluation/observability.md | 2 +- .../safety-security/pii-redaction.md | 2 +- 18 files changed, 40 insertions(+), 40 deletions(-) diff --git a/docs/examples/README.md b/docs/examples/README.md index 75d0a589..3da8a6f2 100644 --- a/docs/examples/README.md +++ b/docs/examples/README.md @@ -41,7 +41,7 @@ Available Python examples: - [CLI Reference Agent](python/cli-reference-agent.md) - Example of Command-line reference agent implementation - [File Operations](python/file_operations.md) - Example of agent with file manipulation capabilities - [MCP Calculator](python/mcp_calculator.md) - Example of agent with Model Context Protocol capabilities -- [Meta Tooling](python/meta_tooling.md) - Example of Agent with Meta tooling capabilities +- [Meta Tooling](python/meta_tooling.md) - Example of agent with Meta tooling capabilities - [Multi-Agent Example](python/multi_agent_example/multi_agent_example.md) - Example of a multi-agent system - [Weather Forecaster](python/weather_forecaster.md) - Example of a weather forecasting agent with http_request capabilities diff --git a/docs/examples/cdk/deploy_to_ec2/package.json b/docs/examples/cdk/deploy_to_ec2/package.json index 8a971b77..9c6552e1 100644 --- a/docs/examples/cdk/deploy_to_ec2/package.json +++ b/docs/examples/cdk/deploy_to_ec2/package.json @@ -1,7 +1,7 @@ { "name": "deploy_to_ec2", "version": "0.1.0", - "description": "CDK TypeScript project to deploy a sample Agent to EC2", + "description": "CDK TypeScript project to deploy a sample agent to EC2", "private": true, "bin": { "cdk-app": "bin/cdk-app.js" diff --git a/docs/examples/cdk/deploy_to_fargate/package.json b/docs/examples/cdk/deploy_to_fargate/package.json index 6c58b85b..94f273d2 100644 --- a/docs/examples/cdk/deploy_to_fargate/package.json +++ b/docs/examples/cdk/deploy_to_fargate/package.json @@ -1,7 +1,7 @@ { "name": "deploy_to_lambda", "version": "0.1.0", - "description": "CDK TypeScript project to deploy a sample Agent Lambda function", + "description": "CDK TypeScript project to deploy a sample agent Lambda function", "private": true, "bin": { "cdk-app": "bin/cdk-app.js" diff --git a/docs/examples/python/agents_workflows.md b/docs/examples/python/agents_workflows.md index 51331e75..f6dbb68e 100644 --- a/docs/examples/python/agents_workflows.md +++ b/docs/examples/python/agents_workflows.md @@ -1,4 +1,4 @@ -# Agentic Workflow: Research Assistant - Multi-Agent Collaboration Example +# Agentic workflow: Research Assistant - Multi-agent Collaboration Example This [example](https://github.com/strands-agents/docs/blob/main/docs/examples/python/agents_workflow.py) shows how to create a multi-agent workflow using Strands agents to perform web research, fact-checking, and report generation. It demonstrates specialized agent roles working together in sequence to process information. @@ -7,10 +7,10 @@ This [example](https://github.com/strands-agents/docs/blob/main/docs/examples/py | Feature | Description | | ------------------ | -------------------------------------- | | **Tools Used** | http_request | -| **Agent Structure**| Multi-Agent Workflow (3 Agents) | +| **Agent Structure**| Multi-agent Workflow (3 Agents) | | **Complexity** | Intermediate | | **Interaction** | Command Line Interface | -| **Key Technique** | Agent-to-Agent Communication | +| **Key Technique** | agent-to-agent Communication | ## Tools Overview @@ -21,9 +21,9 @@ The `http_request` tool enables the agent to make HTTP requests to retrieve info The Research Assistant example implements a three-agent workflow where each agent has a specific role and works with other agents to complete tasks that require multiple steps of processing: -1. **Researcher Agent**: Gathers information from web sources using http_request tool -2. **Analyst Agent**: Verifies facts and identifies key insights from research findings -3. **Writer Agent**: Creates a final report based on the analysis +1. **Researcher agent**: Gathers information from web sources using http_request tool +2. **Analyst agent**: Verifies facts and identifies key insights from research findings +3. **Writer agent**: Creates a final report based on the analysis ## Code Structure and Implementation @@ -32,7 +32,7 @@ The Research Assistant example implements a three-agent workflow where each agen Each agent in the workflow is created with a system prompt that defines its role: ```python -# Researcher Agent with web capabilities +# Researcher agent with web capabilities researcher_agent = Agent( system_prompt=( "You are a Researcher Agent that gathers information from the web. " @@ -44,7 +44,7 @@ researcher_agent = Agent( tools=[http_request] ) -# Analyst Agent for verification and insight extraction +# Analyst agent for verification and insight extraction analyst_agent = Agent( callback_handler=None, system_prompt=( @@ -55,7 +55,7 @@ analyst_agent = Agent( ), ) -# Writer Agent for final report creation +# Writer agent for final report creation writer_agent = Agent( system_prompt=( "You are a Writer Agent that creates clear reports. " @@ -72,19 +72,19 @@ The workflow is orchestrated through a function that passes information between ```python def run_research_workflow(user_input): - # Step 1: Researcher Agent gathers web information + # Step 1: Researcher agent gathers web information researcher_response = researcher_agent( f"Research: '{user_input}'. Use your available tools to gather information from reliable sources.", ) research_findings = str(researcher_response) - # Step 2: Analyst Agent verifies facts + # Step 2: Analyst agent verifies facts analyst_response = analyst_agent( f"Analyze these findings about '{user_input}':\n\n{research_findings}", ) analysis = str(analyst_response) - # Step 3: Writer Agent creates report + # Step 3: Writer agent creates report final_report = writer_agent( f"Create a report on '{user_input}' based on this analysis:\n\n{analysis}" ) @@ -94,12 +94,12 @@ def run_research_workflow(user_input): ### 3. Output Suppression -The example suppresses intermediate outputs during the initialization of the agents, showing users only the final result from the `Writer Agent`: +The example suppresses intermediate outputs during the initialization of the agents, showing users only the final result from the `Writer agent`: ```python researcher_agent = Agent( system_prompt=( - "You are a Researcher Agent that gathers information from the web. " + "You are a Researcher agent that gathers information from the web. " "1. Determine if the input is a research query or factual claim " "2. Use your research tools (http_request, retrieve) to find relevant information " "3. Include source URLs and keep findings under 500 words" @@ -113,9 +113,9 @@ Without this suppression, the default [callback_handler](https://github.com/stra ```python print("\nProcessing: '{user_input}'") -print("\nStep 1: Researcher Agent gathering web information...") +print("\nStep 1: Researcher agent gathering web information...") print("Research complete") -print("Passing research findings to Analyst Agent...\n") +print("Passing research findings to Analyst agent...\n") ``` ## Sample Queries and Responses @@ -202,8 +202,8 @@ print("Passing research findings to Analyst Agent...\n") Here are some ways to extend this agents workflow example: 1. **Add User Feedback Loop**: Allow users to ask for more detail after receiving the report -2. **Implement Parallel Research**: Modify the Researcher Agent to gather information from multiple sources simultaneously -3. **Add Visual Content**: Enhance the Writer Agent to include images or charts in the report +2. **Implement Parallel Research**: Modify the Researcher agent to gather information from multiple sources simultaneously +3. **Add Visual Content**: Enhance the Writer agent to include images or charts in the report 4. **Create a Web Interface**: Build a web UI for the workflow 5. **Add Memory**: Implement session memory so the system remembers previous research sessions diff --git a/docs/examples/python/mcp_calculator.md b/docs/examples/python/mcp_calculator.md index 6bf087ca..09520ecd 100644 --- a/docs/examples/python/mcp_calculator.md +++ b/docs/examples/python/mcp_calculator.md @@ -36,7 +36,7 @@ def add(x: int, y: int) -> int: mcp.run(transport="streamable-http") ``` -### Now, connect the server to the Strands Agent +### Now, connect the server to the Strands agent Now let's walk through how to connect a Strands agent to our MCP server: diff --git a/docs/examples/python/memory_agent.md b/docs/examples/python/memory_agent.md index e3035aae..eacf5178 100644 --- a/docs/examples/python/memory_agent.md +++ b/docs/examples/python/memory_agent.md @@ -8,7 +8,7 @@ This [example](https://github.com/strands-agents/docs/blob/main/docs/examples/py | ------------------ | ------------------------------------------ | | **Tools Used** | mem0_memory, use_llm | | **Complexity** | Intermediate | -| **Agent Type** | Single Agent with Memory Management | +| **Agent Type** | Single agent with Memory Management | | **Interaction** | Command Line Interface | | **Key Focus** | Memory Operations & Contextual Responses | diff --git a/docs/examples/python/meta_tooling.md b/docs/examples/python/meta_tooling.md index 1fbbd3e0..4d37f01f 100644 --- a/docs/examples/python/meta_tooling.md +++ b/docs/examples/python/meta_tooling.md @@ -38,7 +38,7 @@ agent = Agent( ``` - `editor`: Tool used to write code directly to a file named `"custom_tool_X.py"`, where "X" is the index of the tool being created. - - `load_tool`: Tool used to load the tool so the Agent can use it. + - `load_tool`: Tool used to load the tool so the agent can use it. - `shell`: Tool used to execute the tool. #### 2. Agent System Prompt outlines a strict guideline for naming, structure, and creation of the new tools. diff --git a/docs/examples/python/meta_tooling.py b/docs/examples/python/meta_tooling.py index 1e759977..e7129ce8 100644 --- a/docs/examples/python/meta_tooling.py +++ b/docs/examples/python/meta_tooling.py @@ -5,7 +5,7 @@ This example demonstrates Strands Agents' advanced meta-tooling capabilities - the ability of an agent to create, load, and use custom tools dynamically at runtime. -It creates custom tools using the Agent's built-in tools for file operations and implicit tool calling. +It creates custom tools using the agent's built-in tools for file operations and implicit tool calling. """ import os diff --git a/docs/examples/python/weather_forecaster.md b/docs/examples/python/weather_forecaster.md index 1ead0d8e..e4813b9e 100644 --- a/docs/examples/python/weather_forecaster.md +++ b/docs/examples/python/weather_forecaster.md @@ -9,7 +9,7 @@ This [example](https://github.com/strands-agents/docs/blob/main/docs/examples/py | **Tool Used** | http_request | | **API** | National Weather Service API (no key required) | | **Complexity** | Beginner | -| **Agent Type** | Single Agent | +| **Agent Type** | Single agent | | **Interaction** | Command Line Interface | ## Tool Overview diff --git a/docs/user-guide/concepts/agents/prompts.md b/docs/user-guide/concepts/agents/prompts.md index 2eb5c60e..fba46d98 100644 --- a/docs/user-guide/concepts/agents/prompts.md +++ b/docs/user-guide/concepts/agents/prompts.md @@ -4,7 +4,7 @@ In the Strands Agents SDK, system prompts and user messages are the primary way ## System Prompts -System prompts provide high-level instructions to the model about its role, capabilities, and constraints. They set the foundation for how the model should behave throughout the conversation. You can specify the system prompt when initializing an Agent: +System prompts provide high-level instructions to the model about its role, capabilities, and constraints. They set the foundation for how the model should behave throughout the conversation. You can specify the system prompt when initializing an agent: ```python from strands import Agent diff --git a/docs/user-guide/concepts/experimental/agent-config.md b/docs/user-guide/concepts/experimental/agent-config.md index 7440fcdb..71194ff1 100644 --- a/docs/user-guide/concepts/experimental/agent-config.md +++ b/docs/user-guide/concepts/experimental/agent-config.md @@ -147,7 +147,7 @@ The `config_to_agent` function accepts: - `**kwargs`: Additional [Agent constructor parameters](../../../../api-reference/agent/#strands.agent.agent.Agent.__init__) that override config values ```python -# Override config values with valid Agent parameters +# Override config values with valid agent parameters agent = config_to_agent( "/path/to/config.json", name="Data Analyst" @@ -157,7 +157,7 @@ agent = config_to_agent( ## Best Practices 1. **Override when needed**: Use kwargs to override configuration values dynamically -2. **Leverage Agent defaults**: Only specify configuration values you want to override +2. **Leverage agent defaults**: Only specify configuration values you want to override 3. **Use standard tool formats**: Follow Agent class conventions for tool specifications 4. **Handle errors gracefully**: Catch FileNotFoundError and JSONDecodeError for robust applications diff --git a/docs/user-guide/concepts/multi-agent/agent-to-agent.md b/docs/user-guide/concepts/multi-agent/agent-to-agent.md index f0b0625e..c0544b09 100644 --- a/docs/user-guide/concepts/multi-agent/agent-to-agent.md +++ b/docs/user-guide/concepts/multi-agent/agent-to-agent.md @@ -69,7 +69,7 @@ a2a_server.serve() The `A2AServer` constructor accepts several configuration options: -- `agent`: The Strands Agent to wrap with A2A compatibility +- `agent`: The Strands agent to wrap with A2A compatibility - `host`: Hostname or IP address to bind to (default: "127.0.0.1") - `port`: Port to bind to (default: 9000) - `version`: Version of the agent (default: "0.0.1") diff --git a/docs/user-guide/concepts/multi-agent/graph.md b/docs/user-guide/concepts/multi-agent/graph.md index f03625e6..82deaebd 100644 --- a/docs/user-guide/concepts/multi-agent/graph.md +++ b/docs/user-guide/concepts/multi-agent/graph.md @@ -137,7 +137,7 @@ def only_if_research_successful(state): builder.add_edge("research", "analysis", condition=only_if_research_successful) ``` -When multiple conditional edges converge on a single node, the target node executes as soon as any one of the incoming conditional edges is satisfied. The node doesn't wait for all predecessor nodes to complete, just the first one whose condition evaluates to true. +When multiple conditional edges converge on a single node, the target node executes as soon as the condition of any one of the incoming conditional edges is satisfied. The node doesn't wait for all predecessor nodes to complete, just the first one whose condition evaluates to true. ## Nested Multi-Agent Patterns diff --git a/docs/user-guide/concepts/tools/executors.md b/docs/user-guide/concepts/tools/executors.md index 8b4d3c13..4b4353b1 100644 --- a/docs/user-guide/concepts/tools/executors.md +++ b/docs/user-guide/concepts/tools/executors.md @@ -23,7 +23,7 @@ Assuming the model returns `weather_tool` and `time_tool` use requests, the `Con ### Sequential Behavior -On certain prompts, the model may decide to return one tool use request at a time. Under these circumstances, the tools will execute sequentially. Concurrency is only achieved if the model returns multiple tool use requests in a single response. Certain models however offer additional abilities to coherce a desired behavior. For example, Anthropic exposes an explicit parallel tool use setting ([docs](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/implement-tool-use#parallel-tool-use)). +On certain prompts, the model may decide to return one tool use request at a time. Under these circumstances, the tools will execute sequentially. Concurrency is only achieved if the model returns multiple tool use requests in a single response. Certain models however offer additional abilities to coerce a desired behavior. For example, Anthropic exposes an explicit parallel tool use setting ([docs](https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/implement-tool-use#parallel-tool-use)). ## Sequential Executor diff --git a/docs/user-guide/deploy/operating-agents-in-production.md b/docs/user-guide/deploy/operating-agents-in-production.md index 97e95e06..4da83729 100644 --- a/docs/user-guide/deploy/operating-agents-in-production.md +++ b/docs/user-guide/deploy/operating-agents-in-production.md @@ -133,10 +133,10 @@ Built-in guides are available for several AWS services: For production deployments, implement comprehensive monitoring: -1. **Tool Execution Metrics**: Monitor execution time and error rates for each tool -2. **Token Usage**: Track token consumption for cost optimization -3. **Response Times**: Monitor end-to-end response times -4. **Error Rates**: Track and alert on agent errors +1. **Tool Execution Metrics**: Monitor execution time and error rates for each tool. +2. **Token Usage**: Track token consumption for cost optimization. +3. **Response Times**: Monitor end-to-end response times. +4. **Error Rates**: Track and alert on agent errors. Consider integrating with AWS CloudWatch for metrics collection and alerting. diff --git a/docs/user-guide/observability-evaluation/logs.md b/docs/user-guide/observability-evaluation/logs.md index 2607744c..51b7f695 100644 --- a/docs/user-guide/observability-evaluation/logs.md +++ b/docs/user-guide/observability-evaluation/logs.md @@ -153,7 +153,7 @@ In addition to standard logging, Strands Agents SDK provides a callback system f - **Logging**: Internal operations, debugging, errors (not typically visible to end users) - **Callbacks**: User-facing output, streaming responses, tool execution notifications -The callback system is configured through the `callback_handler` parameter when creating an Agent: +The callback system is configured through the `callback_handler` parameter when creating an agent: ```python from strands.handlers.callback_handler import PrintingCallbackHandler @@ -168,7 +168,7 @@ You can create custom callback handlers to process streaming events according to ## Best Practices -1. **Configure Early**: Set up logging configuration before initializing the Agent +1. **Configure Early**: Set up logging configuration before initializing the agent 2. **Appropriate Levels**: Use INFO for normal operation and DEBUG for troubleshooting 3. **Structured Log Format**: Use the structured log format shown in examples for better parsing 4. **Performance**: Be mindful of logging overhead in production environments diff --git a/docs/user-guide/observability-evaluation/observability.md b/docs/user-guide/observability-evaluation/observability.md index ed552692..b8c7ac29 100644 --- a/docs/user-guide/observability-evaluation/observability.md +++ b/docs/user-guide/observability-evaluation/observability.md @@ -97,4 +97,4 @@ With these components in place, a continuous improvement flywheel emerges which ## Conclusion -Effective observability is crucial for developing agents which reliably complete customers’ tasks. The key to success is treating observability not as an afterthought, but as a core component of agent engineering from day one. This investment will pay dividends in improved reliability, faster development cycles, and better customer experiences. +Effective observability is crucial for developing agents that reliably complete customers’ tasks. The key to success is treating observability not as an afterthought, but as a core component of agent engineering from day one. This investment will pay dividends in improved reliability, faster development cycles, and better customer experiences. diff --git a/docs/user-guide/safety-security/pii-redaction.md b/docs/user-guide/safety-security/pii-redaction.md index ec35802e..58aa3873 100644 --- a/docs/user-guide/safety-security/pii-redaction.md +++ b/docs/user-guide/safety-security/pii-redaction.md @@ -85,7 +85,7 @@ print(result) langfuse.flush() ``` -#### Complete example with a Strands Agent +#### Complete example with a Strands agent ```python from strands import Agent From 0cc1a4b5b21a31b6b27b0a4195970bc6372198be Mon Sep 17 00:00:00 2001 From: aedpooji Date: Fri, 7 Nov 2025 19:32:32 -0500 Subject: [PATCH 2/4] Issue 287- strand-deepgram detailed documentation added --- .../user-guide/concepts/streaming/overview.md | 2 +- .../concepts/tools/strands-deepgram.md | 413 ++++++++++++++++++ 2 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 docs/user-guide/concepts/tools/strands-deepgram.md diff --git a/docs/user-guide/concepts/streaming/overview.md b/docs/user-guide/concepts/streaming/overview.md index f0093b8f..6e4c6221 100644 --- a/docs/user-guide/concepts/streaming/overview.md +++ b/docs/user-guide/concepts/streaming/overview.md @@ -69,7 +69,7 @@ from strands import Agent from strands_tools import calculator def process_event(event): - """Shared event processor for both async iterators and callback handlers""" + """Shared event processor for both async iterators and callback handlers.""" # Track event loop lifecycle if event.get("init_event_loop", False): print("πŸ”„ Event loop initialized") diff --git a/docs/user-guide/concepts/tools/strands-deepgram.md b/docs/user-guide/concepts/tools/strands-deepgram.md new file mode 100644 index 00000000..962275da --- /dev/null +++ b/docs/user-guide/concepts/tools/strands-deepgram.md @@ -0,0 +1,413 @@ +# strands-deepgram + +strands-deepgram extends Strands Agents with advanced speech and audio processing using Deepgram's API. +This tool empowers agents to: + +* Transcribe audio with support for 30+ languages and speaker diarization +* Generate natural-sounding speech with multiple voice options +* Perform sentiment analysis, topic detection, and intent recognition + +It provides type-safe, modular integration with the Strands ecosystem. +Deepgram is a leading AI-powered speech recognition and audio intelligence platform that provides speech to text, text to speech and audio intelligence capabilities for AI agents + +## Installation + +Install the strands-deepgram package by running + +```bash +pip install strands-deepgram +``` + +### Dependencies + +strands-deepgram requires the following packages: + +* deepgram-sdk>=3.0 +* requests +* rich (for enhanced console output)(optional) +* strands>=1.11.0 + +If not installed automatically, you can manually install dependencies: + +```bash +pip install deepgram-sdk>=3.0 requests rich strands>=1.11.0 +``` + +## Configuration + +### Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| DEEPGRAM_API_KEY | βœ… | - | Your Deepgram API key (get it from console.deepgram.com) | +| DEEPGRAM_DEFAULT_MODEL | ❌ | nova-3 | Default speech-to-text model | +| DEEPGRAM_DEFAULT_LANGUAGE | ❌ | en | Default language code | + +Set up your API key: + +```bash +export DEEPGRAM_API_KEY=your_deepgram_api_key +export DEEPGRAM_DEFAULT_MODEL=nova-3 +export DEEPGRAM_DEFAULT_LANGUAGE=en +``` + +## Agent level usage + +### πŸŽ™οΈ Speech-to-Text with Deepgram Tool Integration + +Use the Deepgram tool to transcribe and analyze speech with speaker identification. + +```python +from strands import Agent +from strands_deepgram import deepgram + +# Create an agent with Deepgram tool +agent = Agent(tools=[deepgram]) + +# Transcribe audio with speaker identification +agent("transcribe audio from recording.mp3 in Turkish with speaker diarization") +``` + +βœ… Pros: Simple, intuitive, no need to know exact parameters + +### Programmatic Tool Invocation + +The Deepgram tool can also be used directly in Python, outside of agent workflows: + +```python +from strands_deepgram import deepgram + +# Speech-to-Text (Transcription) +result = deepgram( + action="transcribe", + audio_source="meeting.mp3", + language="en", + model="nova-3", + options={ + "diarize": True, + "smart_format": True, + "sentiment": True, + "topics": True + } +) +``` + +### Transcription Options + +| Option | Description | +|--------|-------------| +| diarize | Enable speaker identification and separation in multi-speaker audio | +| smart_format | Apply intelligent formatting including punctuation, capitalization, and number formatting | +| sentiment | Analyze emotional tone and sentiment of spoken content | +| topics | Automatically detect and categorize discussion topics | +| punctuate | Add punctuation marks to improve transcript readability | +| utterances | Include detailed utterance-level information with timestamps | +| detect_language | Automatically identify the spoken language | +| intents | Detect speaker intentions and classify speech acts | + +### πŸ”Š Text-to-Speech with Deepgram Tool Integration + +Use the Deepgram tool to convert text to natural-sounding speech. + +```python +from strands import Agent +from strands_deepgram import deepgram + +# Create an agent with Deepgram tool +agent = Agent(tools=[deepgram]) + +# Text-to-speech +agent("convert this text to speech and save as output.mp3: Hello world") +# Audio intelligence +agent("analyze sentiment and topics in recording.wav") +``` + +### Programmatic Tool Invocation + +The Deepgram tool can also be used directly in Python, outside of agent workflows: + +```python +from strands_deepgram import deepgram + +result = deepgram( + action="text_to_speech", + text="Hello, welcome to our service!", + options={ + "voice": "aura-asteria-en", # Select voice model + "encoding": "mp3", # Audio format + "output_path": "greeting.mp3", # Save location + "play_audio": True # Auto-play generated audio + } +) +``` + +### Text-to-Speech Options + +| Option | Description | +|--------|-------------| +| voice | Select from Aura voice models (asteria, luna, stella, athena, etc.) | +| encoding | Choose audio format (mp3, wav, flac, opus) | +| sample_rate | Set audio quality in Hz (8000–48000) | +| output_path | Specify file location to save generated audio | +| play_audio | Automatically play audio after generation (boolean) | + +### 🧠 Audio Intelligence with Deepgram Tool Integration + +Use the Deepgram tool to detect sentiment and extract topics from audio recordings. + +```python +from strands import Agent +from strands_deepgram import deepgram + +# Create an agent with Deepgram tool +agent = Agent(tools=[deepgram]) + +# Audio intelligence +agent("analyze sentiment and topics in recording.wav") +``` + +## Examples + +### Basic Transcription + +Use the agent to convert audio to plain text quickly with no punctuation, speaker labels, or analysis. + +```python +agent("transcribe this audio file: path/to/audio.mp3") +``` + +### Audio Intelligence (Deepgram) + +Use the Deepgram tool to detect sentiment and extract topics from audio recordings. + +```python +agent("analyze sentiment and topics in recording.wav") +``` + +### Multi-Language Transcription + +Use the agent to transcribe audio in multiple languages automatically. + +```python +agent("transcribe audio.wav in Spanish") +``` + +### Speaker Diarization + +Use the agent to identify and label different speakers in a conversation. + +```python +agent("transcribe meeting.mp3 with speaker identification") +``` + +### Punctuation & Formatting + +Use the agent to add punctuation, capitalization, and number formatting for readability. + +```python +agent("transcribe call.wav with smart formatting") +``` + +### Basic Text-to-Speech + +Use the agent to convert text to natural-sounding speech. + +```python +agent("convert this text to speech: Hello, how are you today?") +``` + +### Save to Specific Format + +Use the agent to save spoken text as an audio file. + +```python +agent("convert text to speech and save as greeting.wav: Welcome to our service") +``` + +### Custom Voice Selection + +Use the agent to choose a voice for speech output. + +```python +agent("use Aura voice to say: Thank you for your patience") +``` + +### Sentiment Analysis + +Use the agent to detect emotions in audio. + +```python +agent("analyze sentiment in customer_call.mp3") +``` + +### Topic Detection + +Use the agent to identify discussion topics in audio. + +```python +agent("identify topics discussed in meeting.wav") +``` + +### Combined Analysis + +Use the agent to analyze both sentiment and topics at once. + +```python +agent("analyze sentiment and topics in audio: call.mp3") +``` + +### Intent Recognition + +Use the agent to detect customer intent from audio conversations. + +```python +agent("detect customer intent in support_call.mp3") +``` + +## URL Support + +Transcribe or analyze audio directly from online sources by providing the file URL. + +```python +# Remote audio file +agent("transcribe https://example.com/audio.mp3") +``` + +Returns raw transcript text without speaker labels or punctuation. + +```python +# With custom options +result = deepgram( + action="transcribe", + audio_source="https://example.com/meeting.wav", + language="es", + options={"diarize": True, "sentiment": True} +) +``` + +Returns structured JSON with transcript, speaker labels, and sentiment scores. + +## Batch Processing + +Process multiple audio files at once to save time. + +```python +# Transcribe all audio files in a folder +agent("transcribe all audio files in the recordings/ folder") +``` + +Returns a list of plain text transcripts, one per file. + +```python +# Analyze sentiment for multiple files +agent("analyze sentiment for all files: call1.mp3, call2.mp3, call3.mp3") +``` + +Returns a list of sentiment scores and topic tags for each audio file. + +## Custom Parameters + +Use specific models, voices, or advanced options to control output quality and style. + +```python +# Transcribe using a specific model with punctuation +agent("transcribe audio.mp3 using nova-2 model with punctuation enabled") +``` + +Returns formatted transcript text with punctuation and capitalization. + +```python +# Generate speech with slow speed +agent("generate speech with slow speed: Welcome to our platform") +``` + +Returns an audio file (e.g., .wav) with the generated speech. + +## Best Practices + +**Tool Descriptions**: Provide clear action parameters (`transcribe`, `text_to_speech`, `analyze`) to help agents understand when and how to use Deepgram capabilities + +**Parameter Types**: Use appropriate parameter types and descriptions to ensure correct tool usage: +- `audio_source`: string (file path or URL) +- `language`: string (ISO language code) +- `options`: object (configuration parameters) + +**Error Handling**: Return informative error messages when tools fail to execute properly + +**Security**: Consider security implications when handling audio files and API keys, especially in production environments + +**Connection Management**: Always validate API connectivity before processing large batches of audio files + +**Timeouts**: Set appropriate timeouts for large file processing to prevent hanging on long-running operations + +```python +# Timeout configuration +result = deepgram( + action="transcribe", + audio_source="large_file.wav", + options={"timeout": 300} # 5 minute timeout +) +``` + +## Troubleshooting + +### API Authentication Errors + +API authentication errors occur when the Deepgram API key is invalid or missing. To resolve these issues, first ensure that your API key is properly set in the environment variables. You should also verify that the API key has the necessary permissions for the operations you're attempting to perform. + +```bash +# Verify API key is set +echo $DEEPGRAM_API_KEY + +# Test API key validity +curl -H "Authorization: Token $DEEPGRAM_API_KEY" https://api.deepgram.com/v1/projects +``` + +### Audio Format Issues + +Audio format problems arise when using unsupported file formats or corrupted audio files. To resolve these errors, verify that your audio file is in a supported format (mp3, wav, flac, opus, m4a, webm). When encountering format issues, converting the file to a supported format often resolves the problem. + +```python +# Convert unsupported formats +import subprocess +subprocess.run(["ffmpeg", "-i", "input.mov", "output.wav"]) +``` + +### Rate Limiting Errors + +Rate limiting occurs when you exceed Deepgram's API request limits. To resolve these issues, implement exponential backoff and retry logic in your application. You should also consider batching requests and adding delays between API calls to stay within rate limits. + +```python +import time + +def transcribe_with_retry(audio_path, max_retries=3): + for attempt in range(max_retries): + try: + return deepgram(action="transcribe", audio_source=audio_path) + except Exception as e: + if "rate limit" in str(e).lower() and attempt < max_retries - 1: + time.sleep(2 ** attempt) # Exponential backoff + continue + raise +``` + +### Large File Processing Errors + +Large file processing errors can occur when files exceed size limits or processing timeouts. To resolve these issues, consider using streaming mode for files larger than 200MB. You should also verify that your network connection is stable for the duration of the upload and processing. + +```python +# Handle large files with streaming +result = deepgram( + action="transcribe", + audio_source="large_file.wav", + options={"streaming": True, "chunk_size": 8192} +) +``` + +## πŸ”— Links + +* PyPI: strands-deepgram +* GitHub: strands-deepgram +* Strands Agents SDK: github.com/strands-agents/strands +* Deepgram API: developers.deepgram.com +* Deepgram Console: console.deepgram.com From 45550b68fb7fe0eaccbc6773d5229d636f7e8288 Mon Sep 17 00:00:00 2001 From: aedpooji Date: Sat, 8 Nov 2025 11:19:51 -0500 Subject: [PATCH 3/4] Issue 287- strand-deepgram detailed documentation added --- .../concepts/tools/strands-deepgram.md | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/user-guide/concepts/tools/strands-deepgram.md b/docs/user-guide/concepts/tools/strands-deepgram.md index 962275da..7e3a2664 100644 --- a/docs/user-guide/concepts/tools/strands-deepgram.md +++ b/docs/user-guide/concepts/tools/strands-deepgram.md @@ -1,6 +1,6 @@ -# strands-deepgram +# Strands Deepgram -strands-deepgram extends Strands Agents with advanced speech and audio processing using Deepgram's API. +Strands Deepgram extends Strands Agents with advanced speech and audio processing using Deepgram's API. This tool empowers agents to: * Transcribe audio with support for 30+ languages and speaker diarization @@ -8,11 +8,11 @@ This tool empowers agents to: * Perform sentiment analysis, topic detection, and intent recognition It provides type-safe, modular integration with the Strands ecosystem. -Deepgram is a leading AI-powered speech recognition and audio intelligence platform that provides speech to text, text to speech and audio intelligence capabilities for AI agents +Deepgram is a leading AI-powered speech recognition and audio intelligence platform that provides speech-to-text, text-to-speech, and audio intelligence capabilities for AI agents. ## Installation -Install the strands-deepgram package by running +Install the Strands Deepgram package by running ```bash pip install strands-deepgram @@ -20,11 +20,11 @@ pip install strands-deepgram ### Dependencies -strands-deepgram requires the following packages: +Strands Deepgram requires the following packages: * deepgram-sdk>=3.0 * requests -* rich (for enhanced console output)(optional) +* rich (for enhanced console output) (optional) * strands>=1.11.0 If not installed automatically, you can manually install dependencies: @@ -39,9 +39,9 @@ pip install deepgram-sdk>=3.0 requests rich strands>=1.11.0 | Variable | Required | Default | Description | |----------|----------|---------|-------------| -| DEEPGRAM_API_KEY | βœ… | - | Your Deepgram API key (get it from console.deepgram.com) | -| DEEPGRAM_DEFAULT_MODEL | ❌ | nova-3 | Default speech-to-text model | -| DEEPGRAM_DEFAULT_LANGUAGE | ❌ | en | Default language code | +| **DEEPGRAM_API_KEY** | βœ… | - | Your Deepgram API key (get it from console.deepgram.com) | +| **DEEPGRAM_DEFAULT_MODEL** | ❌ | **nova-3** | Default speech-to-text model | +| **DEEPGRAM_DEFAULT_LANGUAGE** | ❌ | **en** | Default language code | Set up your API key: @@ -51,7 +51,7 @@ export DEEPGRAM_DEFAULT_MODEL=nova-3 export DEEPGRAM_DEFAULT_LANGUAGE=en ``` -## Agent level usage +## Agent Level Usage ### πŸŽ™οΈ Speech-to-Text with Deepgram Tool Integration @@ -96,14 +96,14 @@ result = deepgram( | Option | Description | |--------|-------------| -| diarize | Enable speaker identification and separation in multi-speaker audio | -| smart_format | Apply intelligent formatting including punctuation, capitalization, and number formatting | -| sentiment | Analyze emotional tone and sentiment of spoken content | -| topics | Automatically detect and categorize discussion topics | -| punctuate | Add punctuation marks to improve transcript readability | -| utterances | Include detailed utterance-level information with timestamps | -| detect_language | Automatically identify the spoken language | -| intents | Detect speaker intentions and classify speech acts | +| **diarize** | Enable speaker identification and separation in multi-speaker audio | +| **smart_format** | Apply intelligent formatting including punctuation, capitalization, and number formatting | +| **sentiment** | Analyze emotional tone and sentiment of spoken content | +| **topics** | Automatically detect and categorize discussion topics | +| **punctuate** | Add punctuation marks to improve transcript readability | +| **utterances** | Include detailed utterance-level information with timestamps | +| **detect_language** | Automatically identify the spoken language | +| **intents** | Detect speaker intentions and classify speech acts | ### πŸ”Š Text-to-Speech with Deepgram Tool Integration @@ -145,11 +145,11 @@ result = deepgram( | Option | Description | |--------|-------------| -| voice | Select from Aura voice models (asteria, luna, stella, athena, etc.) | -| encoding | Choose audio format (mp3, wav, flac, opus) | -| sample_rate | Set audio quality in Hz (8000–48000) | -| output_path | Specify file location to save generated audio | -| play_audio | Automatically play audio after generation (boolean) | +| **voice** | Select from Aura voice models (asteria, luna, stella, athena, etc.) | +| **encoding** | Choose audio format (mp3, wav, flac, opus) | +| **sample_rate** | Set audio quality in Hz (8000–48000) | +| **output_path** | Specify file location to save generated audio | +| **play_audio** | Automatically play audio after generation (boolean) | ### 🧠 Audio Intelligence with Deepgram Tool Integration From f47c1c141e4603eaec9c27e717ce711e7e9b8cd6 Mon Sep 17 00:00:00 2001 From: aedpooji Date: Sat, 8 Nov 2025 13:05:52 -0500 Subject: [PATCH 4/4] Issue 287- strand-deepgram detailed documentation latest --- .../agent.md | 0 .../event-loop.md | 0 .../experimental.md | 0 .../handlers.md | 0 .../hooks.md | 0 .../interrupt.md | 0 .../models.md | 0 .../multiagent.md | 0 .../session.md | 0 .../telemetry.md | 0 .../tools.md | 0 .../types.md | 0 .../concepts/tools/strands-deepgram.md | 72 ++++++++++++++++++- mkdocs.yml | 32 +++++---- 14 files changed, 87 insertions(+), 17 deletions(-) rename docs/{api-reference => api-reference-disabled}/agent.md (100%) rename docs/{api-reference => api-reference-disabled}/event-loop.md (100%) rename docs/{api-reference => api-reference-disabled}/experimental.md (100%) rename docs/{api-reference => api-reference-disabled}/handlers.md (100%) rename docs/{api-reference => api-reference-disabled}/hooks.md (100%) rename docs/{api-reference => api-reference-disabled}/interrupt.md (100%) rename docs/{api-reference => api-reference-disabled}/models.md (100%) rename docs/{api-reference => api-reference-disabled}/multiagent.md (100%) rename docs/{api-reference => api-reference-disabled}/session.md (100%) rename docs/{api-reference => api-reference-disabled}/telemetry.md (100%) rename docs/{api-reference => api-reference-disabled}/tools.md (100%) rename docs/{api-reference => api-reference-disabled}/types.md (100%) diff --git a/docs/api-reference/agent.md b/docs/api-reference-disabled/agent.md similarity index 100% rename from docs/api-reference/agent.md rename to docs/api-reference-disabled/agent.md diff --git a/docs/api-reference/event-loop.md b/docs/api-reference-disabled/event-loop.md similarity index 100% rename from docs/api-reference/event-loop.md rename to docs/api-reference-disabled/event-loop.md diff --git a/docs/api-reference/experimental.md b/docs/api-reference-disabled/experimental.md similarity index 100% rename from docs/api-reference/experimental.md rename to docs/api-reference-disabled/experimental.md diff --git a/docs/api-reference/handlers.md b/docs/api-reference-disabled/handlers.md similarity index 100% rename from docs/api-reference/handlers.md rename to docs/api-reference-disabled/handlers.md diff --git a/docs/api-reference/hooks.md b/docs/api-reference-disabled/hooks.md similarity index 100% rename from docs/api-reference/hooks.md rename to docs/api-reference-disabled/hooks.md diff --git a/docs/api-reference/interrupt.md b/docs/api-reference-disabled/interrupt.md similarity index 100% rename from docs/api-reference/interrupt.md rename to docs/api-reference-disabled/interrupt.md diff --git a/docs/api-reference/models.md b/docs/api-reference-disabled/models.md similarity index 100% rename from docs/api-reference/models.md rename to docs/api-reference-disabled/models.md diff --git a/docs/api-reference/multiagent.md b/docs/api-reference-disabled/multiagent.md similarity index 100% rename from docs/api-reference/multiagent.md rename to docs/api-reference-disabled/multiagent.md diff --git a/docs/api-reference/session.md b/docs/api-reference-disabled/session.md similarity index 100% rename from docs/api-reference/session.md rename to docs/api-reference-disabled/session.md diff --git a/docs/api-reference/telemetry.md b/docs/api-reference-disabled/telemetry.md similarity index 100% rename from docs/api-reference/telemetry.md rename to docs/api-reference-disabled/telemetry.md diff --git a/docs/api-reference/tools.md b/docs/api-reference-disabled/tools.md similarity index 100% rename from docs/api-reference/tools.md rename to docs/api-reference-disabled/tools.md diff --git a/docs/api-reference/types.md b/docs/api-reference-disabled/types.md similarity index 100% rename from docs/api-reference/types.md rename to docs/api-reference-disabled/types.md diff --git a/docs/user-guide/concepts/tools/strands-deepgram.md b/docs/user-guide/concepts/tools/strands-deepgram.md index 7e3a2664..307df486 100644 --- a/docs/user-guide/concepts/tools/strands-deepgram.md +++ b/docs/user-guide/concepts/tools/strands-deepgram.md @@ -118,8 +118,6 @@ agent = Agent(tools=[deepgram]) # Text-to-speech agent("convert this text to speech and save as output.mp3: Hello world") -# Audio intelligence -agent("analyze sentiment and topics in recording.wav") ``` ### Programmatic Tool Invocation @@ -173,6 +171,10 @@ agent("analyze sentiment and topics in recording.wav") Use the agent to convert audio to plain text quickly with no punctuation, speaker labels, or analysis. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("transcribe this audio file: path/to/audio.mp3") ``` @@ -181,6 +183,10 @@ agent("transcribe this audio file: path/to/audio.mp3") Use the Deepgram tool to detect sentiment and extract topics from audio recordings. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("analyze sentiment and topics in recording.wav") ``` @@ -189,6 +195,10 @@ agent("analyze sentiment and topics in recording.wav") Use the agent to transcribe audio in multiple languages automatically. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("transcribe audio.wav in Spanish") ``` @@ -197,6 +207,10 @@ agent("transcribe audio.wav in Spanish") Use the agent to identify and label different speakers in a conversation. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("transcribe meeting.mp3 with speaker identification") ``` @@ -205,6 +219,10 @@ agent("transcribe meeting.mp3 with speaker identification") Use the agent to add punctuation, capitalization, and number formatting for readability. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("transcribe call.wav with smart formatting") ``` @@ -213,6 +231,10 @@ agent("transcribe call.wav with smart formatting") Use the agent to convert text to natural-sounding speech. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("convert this text to speech: Hello, how are you today?") ``` @@ -221,6 +243,10 @@ agent("convert this text to speech: Hello, how are you today?") Use the agent to save spoken text as an audio file. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("convert text to speech and save as greeting.wav: Welcome to our service") ``` @@ -229,6 +255,10 @@ agent("convert text to speech and save as greeting.wav: Welcome to our service") Use the agent to choose a voice for speech output. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("use Aura voice to say: Thank you for your patience") ``` @@ -237,6 +267,10 @@ agent("use Aura voice to say: Thank you for your patience") Use the agent to detect emotions in audio. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("analyze sentiment in customer_call.mp3") ``` @@ -245,6 +279,10 @@ agent("analyze sentiment in customer_call.mp3") Use the agent to identify discussion topics in audio. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("identify topics discussed in meeting.wav") ``` @@ -253,6 +291,10 @@ agent("identify topics discussed in meeting.wav") Use the agent to analyze both sentiment and topics at once. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("analyze sentiment and topics in audio: call.mp3") ``` @@ -261,6 +303,10 @@ agent("analyze sentiment and topics in audio: call.mp3") Use the agent to detect customer intent from audio conversations. ```python +from strands import Agent +from strands_deepgram import deepgram + +agent = Agent(tools=[deepgram]) agent("detect customer intent in support_call.mp3") ``` @@ -269,13 +315,19 @@ agent("detect customer intent in support_call.mp3") Transcribe or analyze audio directly from online sources by providing the file URL. ```python +from strands import Agent +from strands_deepgram import deepgram + # Remote audio file +agent = Agent(tools=[deepgram]) agent("transcribe https://example.com/audio.mp3") ``` Returns raw transcript text without speaker labels or punctuation. ```python +from strands_deepgram import deepgram + # With custom options result = deepgram( action="transcribe", @@ -292,14 +344,22 @@ Returns structured JSON with transcript, speaker labels, and sentiment scores. Process multiple audio files at once to save time. ```python +from strands import Agent +from strands_deepgram import deepgram + # Transcribe all audio files in a folder +agent = Agent(tools=[deepgram]) agent("transcribe all audio files in the recordings/ folder") ``` Returns a list of plain text transcripts, one per file. ```python +from strands import Agent +from strands_deepgram import deepgram + # Analyze sentiment for multiple files +agent = Agent(tools=[deepgram]) agent("analyze sentiment for all files: call1.mp3, call2.mp3, call3.mp3") ``` @@ -310,14 +370,22 @@ Returns a list of sentiment scores and topic tags for each audio file. Use specific models, voices, or advanced options to control output quality and style. ```python +from strands import Agent +from strands_deepgram import deepgram + # Transcribe using a specific model with punctuation +agent = Agent(tools=[deepgram]) agent("transcribe audio.mp3 using nova-2 model with punctuation enabled") ``` Returns formatted transcript text with punctuation and capitalization. ```python +from strands import Agent +from strands_deepgram import deepgram + # Generate speech with slow speed +agent = Agent(tools=[deepgram]) agent("generate speech with slow speed: Welcome to our platform") ``` diff --git a/mkdocs.yml b/mkdocs.yml index ec2b1650..21df50a3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -88,6 +88,7 @@ nav: - Model Context Protocol (MCP): user-guide/concepts/tools/mcp-tools.md - Executors: user-guide/concepts/tools/executors.md - Community Tools Package: user-guide/concepts/tools/community-tools-package.md + - Strands Deepgram: user-guide/concepts/tools/strands-deepgram.md - Model Providers: - Amazon Bedrock: user-guide/concepts/model-providers/amazon-bedrock.md - Anthropic: user-guide/concepts/model-providers/anthropic.md @@ -164,24 +165,25 @@ nav: - Amazon AgentCore Memory: community/session-managers/agentcore-memory.md - Contribute ❀️: https://github.com/strands-agents/sdk-python/blob/main/CONTRIBUTING.md - - API Reference: - - Agent: api-reference/agent.md - - Event Loop: api-reference/event-loop.md - - Experimental: api-reference/experimental.md - - Handlers: api-reference/handlers.md - - Hooks: api-reference/hooks.md - - Interrupt: api-reference/interrupt.md - - Models: api-reference/models.md - - Multiagent: api-reference/multiagent.md - - Session: api-reference/session.md - - Telemetry: api-reference/telemetry.md - - Tools: api-reference/tools.md - - Types: api-reference/types.md + # - API Reference: + # - Agent: api-reference/agent.md + # - Event Loop: api-reference/event-loop.md + # - Experimental: api-reference/experimental.md + # - Handlers: api-reference/handlers.md + # - Hooks: api-reference/hooks.md + # - Interrupt: api-reference/interrupt.md + # - Models: api-reference/models.md + # - Multiagent: api-reference/multiagent.md + # - Session: api-reference/session.md + # - Telemetry: api-reference/telemetry.md + # - Tools: api-reference/tools.md + # - Types: api-reference/types.md exclude_docs: | node_modules .venv _dependencies + api-reference-disabled plugins: - search @@ -207,8 +209,8 @@ plugins: - community/**/*.md Examples: - examples/**/*.md - API Reference: - - api-reference/*.md + # API Reference: + # - api-reference/*.md extra: social: