Skip to content

Commit 32c36b2

Browse files
authored
Add MCP-powered voice agent
Add MCP Voice Agent project
2 parents 8625e92 + bad0db5 commit 32c36b2

File tree

6 files changed

+409
-0
lines changed

6 files changed

+409
-0
lines changed

mcp-voice-agent/.env.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
LIVEKIT_URL=wss://YOUR-PROJECT.livekit.cloud
2+
LIVEKIT_API_KEY=your-key
3+
LIVEKIT_API_SECRET=your-secret
4+
OPENAI_API_KEY=your-key
5+
SUPABASE_ACCESS_TOKEN=your-token
6+
FIRECRAWL_API_KEY=your-key

mcp-voice-agent/.gitignore

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Virtual Environment
2+
venv/
3+
env/
4+
ENV/
5+
aai/
6+
7+
# Python
8+
__pycache__/
9+
*.py[cod]
10+
*$py.class
11+
*.so
12+
.Python
13+
build/
14+
develop-eggs/
15+
dist/
16+
downloads/
17+
eggs/
18+
.eggs/
19+
lib/
20+
lib64/
21+
parts/
22+
sdist/
23+
var/
24+
wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
29+
# IDE
30+
.idea/
31+
.vscode/
32+
*.swp
33+
*.swo
34+
35+
# Environment variables
36+
.env
37+
.env.local
38+
39+
# Logs
40+
*.log

mcp-voice-agent/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# MCP-powered voice agent
2+
3+
This project implements a voice agent that combines web search capabilities via Firecrawl with Supabase database operations through MCP (Model Control Protocol).
4+
5+
## Installation
6+
7+
Ensure you have Python 3.x installed and run:
8+
9+
```bash
10+
pip install -r requirements.txt
11+
```
12+
13+
## Implementation: agent.py
14+
15+
This implementation uses AssemblyAI's services for speech-to-text, along with Firecrawl for web search and Supabase for database operations.
16+
17+
### Requirements
18+
19+
- Firecrawl API key
20+
- Supabase access token
21+
- OpenAI API key
22+
- AssemblyAI API key
23+
- LiveKit credentials
24+
25+
### Setup
26+
27+
Copy `.env.example` to `.env` and configure the following environment variables:
28+
29+
```
30+
FIRECRAWL_API_KEY=your_firecrawl_api_key
31+
SUPABASE_ACCESS_TOKEN=your_supabase_token
32+
OPENAI_API_KEY=your_openai_api_key
33+
ASSEMBLYAI_API_KEY=your_assemblyai_api_key
34+
LIVEKIT_URL=your_livekit_url
35+
LIVEKIT_API_KEY=your_livekit_api_key
36+
LIVEKIT_API_SECRET=your_livekit_api_secret
37+
```
38+
39+
### Running
40+
41+
Start the agent using:
42+
43+
```bash
44+
python agent.py
45+
```
46+
47+
The agent will:
48+
1. Connect to LiveKit
49+
2. Initialize the MCP server for Supabase integration
50+
3. Set up voice interaction capabilities
51+
4. Start listening for user input
52+
53+
## Features
54+
55+
- Real-time web search using Firecrawl
56+
- Supabase database integration via MCP
57+
- Voice interaction capabilities:
58+
- Silero VAD (Voice Activity Detection)
59+
- AssemblyAI Speech-to-Text
60+
- OpenAI GPT-4 for language processing
61+
- OpenAI TTS for text-to-speech
62+
63+
## 📬 Stay Updated with Our Newsletter!
64+
65+
**Get a FREE Data Science eBook** 📖 with 150+ essential lessons in Data Science when you subscribe to our newsletter! Stay in the loop with the latest tutorials, insights, and exclusive resources. [Subscribe now!](https://join.dailydoseofds.com)
66+
67+
[![Daily Dose of Data Science Newsletter](https://github.com/patchy631/ai-engineering/blob/main/resources/join_ddods.png)](https://join.dailydoseofds.com)
68+
69+
## Contribution
70+
71+
Contributions are welcome! Feel free to fork this repository and submit pull requests with your improvements.

mcp-voice-agent/agent.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
"""
2+
MCP voice agent that routes queries either to Firecrawl web search or to Supabase via MCP.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import asyncio
8+
import copy
9+
import json
10+
import logging
11+
import os
12+
from typing import Any, Callable, List, Optional
13+
14+
import inspect
15+
from dotenv import load_dotenv
16+
from firecrawl import FirecrawlApp, ScrapeOptions
17+
from pydantic_ai.mcp import MCPServerStdio
18+
19+
from livekit.agents import (
20+
Agent,
21+
AgentSession,
22+
JobContext,
23+
RunContext,
24+
WorkerOptions,
25+
cli,
26+
function_tool,
27+
)
28+
from livekit.plugins import assemblyai, openai, silero
29+
30+
# ------------------------------------------------------------------------------
31+
# Configuration & Logging
32+
# ------------------------------------------------------------------------------
33+
load_dotenv()
34+
logging.basicConfig(level=logging.INFO)
35+
logger = logging.getLogger(__name__)
36+
37+
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
38+
SUPABASE_TOKEN = os.getenv("SUPABASE_ACCESS_TOKEN")
39+
40+
if not FIRECRAWL_API_KEY:
41+
logger.error("FIRECRAWL_API_KEY is not set in environment.")
42+
raise EnvironmentError("Please set FIRECRAWL_API_KEY env var.")
43+
44+
if not SUPABASE_TOKEN:
45+
logger.error("SUPABASE_ACCESS_TOKEN is not set in environment.")
46+
raise EnvironmentError("Please set SUPABASE_ACCESS_TOKEN env var.")
47+
48+
firecrawl_app = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
49+
50+
51+
def _py_type(schema: dict) -> Any:
52+
"""Convert JSON schema types into Python typing annotations."""
53+
t = schema.get("type")
54+
mapping = {
55+
"string": str,
56+
"integer": int,
57+
"number": float,
58+
"boolean": bool,
59+
"object": dict,
60+
}
61+
62+
if isinstance(t, list):
63+
if "array" in t:
64+
return List[_py_type(schema.get("items", {}))]
65+
t = t[0]
66+
67+
if isinstance(t, str) and t in mapping:
68+
return mapping[t]
69+
if t == "array":
70+
return List[_py_type(schema.get("items", {}))]
71+
72+
return Any
73+
74+
75+
def schema_to_google_docstring(description: str, schema: dict) -> str:
76+
"""
77+
Generate a Google-style docstring section from a JSON schema.
78+
"""
79+
props = schema.get("properties", {})
80+
required = set(schema.get("required", []))
81+
lines = [description or "", "Args:"]
82+
83+
for name, prop in props.items():
84+
t = prop.get("type", "Any")
85+
if isinstance(t, list):
86+
if "array" in t:
87+
subtype = prop.get("items", {}).get("type", "Any")
88+
py_type = f"List[{subtype.capitalize()}]"
89+
else:
90+
py_type = t[0].capitalize()
91+
elif t == "array":
92+
subtype = prop.get("items", {}).get("type", "Any")
93+
py_type = f"List[{subtype.capitalize()}]"
94+
else:
95+
py_type = t.capitalize()
96+
97+
if name not in required:
98+
py_type = f"Optional[{py_type}]"
99+
100+
desc = prop.get("description", "")
101+
lines.append(f" {name} ({py_type}): {desc}")
102+
103+
return "\n".join(lines)
104+
105+
106+
@function_tool
107+
async def firecrawl_search(
108+
context: RunContext,
109+
query: str,
110+
limit: int = 5
111+
) -> List[str]:
112+
"""
113+
Search the web via Firecrawl.
114+
115+
Args:
116+
context (RunContext): LiveKit runtime context.
117+
query (str): Search query string.
118+
limit (int): Maximum pages to crawl.
119+
120+
Returns:
121+
List[str]: Raw page contents.
122+
"""
123+
url = f"https://www.google.com/search?q={query}"
124+
logger.debug("Starting Firecrawl for URL: %s (limit=%d)", url, limit)
125+
126+
loop = asyncio.get_event_loop()
127+
try:
128+
result = await loop.run_in_executor(
129+
None,
130+
lambda: firecrawl_app.crawl_url(
131+
url,
132+
limit=limit,
133+
scrape_options=ScrapeOptions(formats=["text", "markdown"])
134+
)
135+
)
136+
logger.info("Firecrawl returned %d pages", len(result))
137+
return result
138+
except Exception as e:
139+
logger.error("Firecrawl search failed: %s", e, exc_info=True)
140+
return []
141+
142+
143+
async def build_livekit_tools(server: MCPServerStdio) -> List[Callable]:
144+
"""
145+
Build LiveKit tools from a Supabase MCP server.
146+
"""
147+
tools: List[Callable] = []
148+
all_tools = await server.list_tools()
149+
logger.info("Found %d MCP tools", len(all_tools))
150+
151+
for td in all_tools:
152+
if td.name == "deploy_edge_function":
153+
logger.warning("Skipping tool %s", td.name)
154+
continue
155+
156+
schema = copy.deepcopy(td.parameters_json_schema)
157+
if td.name == "list_tables":
158+
props = schema.setdefault("properties", {})
159+
props["schemas"] = {
160+
"type": ["array", "null"],
161+
"items": {"type": "string"},
162+
"default": []
163+
}
164+
schema["required"] = [r for r in schema.get("required", []) if r != "schemas"]
165+
166+
props = schema.get("properties", {})
167+
required = set(schema.get("required", []))
168+
169+
def make_proxy(
170+
tool_def=td,
171+
_props=props,
172+
_required=required,
173+
_schema=schema
174+
) -> Callable:
175+
async def proxy(context: RunContext, **kwargs):
176+
# Convert None → [] for array params
177+
for k, v in list(kwargs.items()):
178+
if ((_props[k].get("type") == "array"
179+
or "array" in (_props[k].get("type") or []))
180+
and v is None):
181+
kwargs[k] = []
182+
183+
response = await server.call_tool(tool_def.name, arguments=kwargs or None)
184+
if isinstance(response, list):
185+
return response
186+
if hasattr(response, "content") and response.content:
187+
text = response.content[0].text
188+
try:
189+
return json.loads(text)
190+
except json.JSONDecodeError:
191+
return text
192+
return response
193+
194+
# Build signature from schema
195+
params = [
196+
inspect.Parameter("context", inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=RunContext)
197+
]
198+
ann = {"context": RunContext}
199+
200+
for name, ps in _props.items():
201+
default = ps.get("default", inspect._empty if name in required else None)
202+
params.append(
203+
inspect.Parameter(
204+
name,
205+
inspect.Parameter.KEYWORD_ONLY,
206+
annotation=_py_type(ps),
207+
default=default,
208+
)
209+
)
210+
ann[name] = _py_type(ps)
211+
212+
proxy.__signature__ = inspect.Signature(params)
213+
proxy.__annotations__ = ann
214+
proxy.__name__ = tool_def.name
215+
proxy.__doc__ = schema_to_google_docstring(tool_def.description or "", _schema)
216+
return function_tool(proxy)
217+
218+
tools.append(make_proxy())
219+
220+
return tools
221+
222+
223+
async def entrypoint(ctx: JobContext) -> None:
224+
"""
225+
Main entrypoint for the LiveKit agent.
226+
"""
227+
await ctx.connect()
228+
server = MCPServerStdio(
229+
"npx",
230+
args=["-y", "@supabase/mcp-server-supabase@latest", "--access-token", SUPABASE_TOKEN],
231+
)
232+
await server.__aenter__()
233+
234+
try:
235+
supabase_tools = await build_livekit_tools(server)
236+
tools = [firecrawl_search] + supabase_tools
237+
238+
agent = Agent(
239+
instructions=(
240+
"You can either perform live web searches via `firecrawl_search` or "
241+
"database queries via Supabase MCP tools. "
242+
"Choose the appropriate tool based on whether the user needs fresh web data "
243+
"(news, external facts) or internal Supabase data."
244+
),
245+
tools=tools,
246+
)
247+
248+
session = AgentSession(
249+
vad=silero.VAD.load(min_silence_duration=0.1),
250+
stt=assemblyai.STT(word_boost=["Supabase"]),
251+
llm=openai.LLM(model="gpt-4o"),
252+
tts=openai.TTS(voice="ash"),
253+
)
254+
255+
await session.start(agent=agent, room=ctx.room)
256+
await session.generate_reply(instructions="Hello! How can I assist you today?")
257+
258+
# Keep the session alive until cancelled
259+
try:
260+
while True:
261+
await asyncio.sleep(1)
262+
except asyncio.CancelledError:
263+
logger.info("Session cancelled, shutting down.")
264+
265+
finally:
266+
await server.__aexit__(None, None, None)
267+
268+
269+
if __name__ == "__main__":
270+
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

0 commit comments

Comments
 (0)