Skip to content

Commit 409df13

Browse files
Luis Paboncopybara-github
authored andcommitted
feat: Granular Per Agent Speech Configuration
Merge #3170 Addresses Feature Request: #3116 This PR adds a `speech_config` to the **LLM Agent configuration** for the **live use case**. When an **asynchronous LLM** call is made to the **Gemini Live API**, it prioritizes the most specific agent configuration's speech_config. If that is null, it then uses the run configuration's speech_config. Unit tests have been added to verify this behavior. COPYBARA_INTEGRATE_REVIEW=#3170 from qyuo:bidi_agent_speech_config af1bd27 PiperOrigin-RevId: 822305427
1 parent 2a901d1 commit 409df13

File tree

4 files changed

+229
-3
lines changed

4 files changed

+229
-3
lines changed

contributing/samples/live_bidi_streaming_multi_agent/agent.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from google.adk.agents.llm_agent import Agent
1818
from google.adk.examples.example import Example
19+
from google.adk.models.google_llm import Gemini
1920
from google.adk.tools.example_tool import ExampleTool
2021
from google.genai import types
2122

@@ -28,6 +29,17 @@ def roll_die(sides: int) -> int:
2829

2930
roll_agent = Agent(
3031
name="roll_agent",
32+
model=Gemini(
33+
# model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
34+
model="gemini-live-2.5-flash-preview", # for AI studio key
35+
speech_config=types.SpeechConfig(
36+
voice_config=types.VoiceConfig(
37+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
38+
voice_name="Kore",
39+
)
40+
)
41+
),
42+
),
3143
description="Handles rolling dice of different sizes.",
3244
instruction="""
3345
You are responsible for rolling dice based on the user's request.
@@ -69,6 +81,17 @@ def check_prime(nums: list[int]) -> str:
6981

7082
prime_agent = Agent(
7183
name="prime_agent",
84+
model=Gemini(
85+
# model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
86+
model="gemini-live-2.5-flash-preview", # for AI studio key
87+
speech_config=types.SpeechConfig(
88+
voice_config=types.VoiceConfig(
89+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
90+
voice_name="Puck",
91+
)
92+
)
93+
),
94+
),
7295
description="Handles checking if numbers are prime.",
7396
instruction="""
7497
You are responsible for checking whether numbers are prime.
@@ -100,8 +123,17 @@ def get_current_weather(location: str):
100123

101124
root_agent = Agent(
102125
# find supported models here: https://google.github.io/adk-docs/get-started/streaming/quickstart-streaming/
103-
model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
104-
# model="gemini-live-2.5-flash-preview", # for AI studio key
126+
model=Gemini(
127+
# model="gemini-2.0-flash-live-preview-04-09", # for Vertex project
128+
model="gemini-live-2.5-flash-preview", # for AI studio key
129+
speech_config=types.SpeechConfig(
130+
voice_config=types.VoiceConfig(
131+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
132+
voice_name="Zephyr",
133+
)
134+
)
135+
),
136+
),
105137
name="root_agent",
106138
instruction="""
107139
You are a helpful assistant that can check time, roll dice and check if numbers are prime.

src/google/adk/agents/run_config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ class StreamingMode(Enum):
3535

3636

3737
class RunConfig(BaseModel):
38-
"""Configs for runtime behavior of agents."""
38+
"""Configs for runtime behavior of agents.
39+
40+
The configs here will be overriden by agent-specific configurations.
41+
"""
3942

4043
model_config = ConfigDict(
4144
extra='forbid',

src/google/adk/models/google_llm.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class Gemini(BaseLlm):
6060

6161
model: str = 'gemini-2.5-flash'
6262

63+
speech_config: Optional[types.SpeechConfig] = None
64+
6365
retry_options: Optional[types.HttpRetryOptions] = None
6466
"""Allow Gemini to retry failed responses.
6567
@@ -269,6 +271,9 @@ async def connect(self, llm_request: LlmRequest) -> BaseLlmConnection:
269271
self._live_api_version
270272
)
271273

274+
if self.speech_config is not None:
275+
llm_request.live_connect_config.speech_config = self.speech_config
276+
272277
llm_request.live_connect_config.system_instruction = types.Content(
273278
role='system',
274279
parts=[

tests/unittests/models/test_google_llm.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,3 +1858,189 @@ def mock_model_dump(*args, **kwargs):
18581858
# Should still succeed using repr()
18591859
assert "Config:" in log_output
18601860
assert "GenerateContentConfig" in log_output
1861+
1862+
1863+
@pytest.mark.asyncio
1864+
async def test_connect_uses_gemini_speech_config_when_request_is_none(
1865+
gemini_llm, llm_request
1866+
):
1867+
"""Tests that Gemini's speech_config is used when live_connect_config's is None."""
1868+
# Arrange: Set a speech_config on the Gemini instance with the voice "Kore"
1869+
gemini_llm.speech_config = types.SpeechConfig(
1870+
voice_config=types.VoiceConfig(
1871+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
1872+
voice_name="Kore",
1873+
)
1874+
)
1875+
)
1876+
llm_request.live_connect_config = (
1877+
types.LiveConnectConfig()
1878+
) # speech_config is None
1879+
1880+
mock_live_session = mock.AsyncMock()
1881+
1882+
with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
1883+
1884+
class MockLiveConnect:
1885+
1886+
async def __aenter__(self):
1887+
return mock_live_session
1888+
1889+
async def __aexit__(self, *args):
1890+
pass
1891+
1892+
mock_live_client.aio.live.connect.return_value = MockLiveConnect()
1893+
1894+
# Act
1895+
async with gemini_llm.connect(llm_request) as connection:
1896+
# Assert
1897+
mock_live_client.aio.live.connect.assert_called_once()
1898+
call_args = mock_live_client.aio.live.connect.call_args
1899+
config_arg = call_args.kwargs["config"]
1900+
1901+
# Verify the speech_config from the Gemini instance was used
1902+
assert config_arg.speech_config is not None
1903+
assert (
1904+
config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
1905+
== "Kore"
1906+
)
1907+
assert isinstance(connection, GeminiLlmConnection)
1908+
1909+
1910+
@pytest.mark.asyncio
1911+
async def test_connect_uses_request_speech_config_when_gemini_is_none(
1912+
gemini_llm, llm_request
1913+
):
1914+
"""Tests that request's speech_config is used when Gemini's is None."""
1915+
# Arrange: Set a speech_config on the request instance with the voice "Kore"
1916+
gemini_llm.speech_config = None
1917+
request_speech_config = types.SpeechConfig(
1918+
voice_config=types.VoiceConfig(
1919+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
1920+
voice_name="Kore",
1921+
)
1922+
)
1923+
)
1924+
llm_request.live_connect_config = types.LiveConnectConfig(
1925+
speech_config=request_speech_config
1926+
)
1927+
1928+
mock_live_session = mock.AsyncMock()
1929+
1930+
with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
1931+
1932+
class MockLiveConnect:
1933+
1934+
async def __aenter__(self):
1935+
return mock_live_session
1936+
1937+
async def __aexit__(self, *args):
1938+
pass
1939+
1940+
mock_live_client.aio.live.connect.return_value = MockLiveConnect()
1941+
1942+
# Act
1943+
async with gemini_llm.connect(llm_request) as connection:
1944+
# Assert
1945+
mock_live_client.aio.live.connect.assert_called_once()
1946+
call_args = mock_live_client.aio.live.connect.call_args
1947+
config_arg = call_args.kwargs["config"]
1948+
1949+
# Verify the speech_config from the request instance was used
1950+
assert config_arg.speech_config is not None
1951+
assert (
1952+
config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
1953+
== "Kore"
1954+
)
1955+
assert isinstance(connection, GeminiLlmConnection)
1956+
1957+
1958+
@pytest.mark.asyncio
1959+
async def test_connect_request_gemini_config_overrides_speech_config(
1960+
gemini_llm, llm_request
1961+
):
1962+
"""Tests that live_connect_config's speech_config is preserved even if Gemini has one."""
1963+
# Arrange: Set different speech_configs on both the Gemini instance ("Puck") and the request ("Zephyr")
1964+
gemini_llm.speech_config = types.SpeechConfig(
1965+
voice_config=types.VoiceConfig(
1966+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
1967+
voice_name="Puck",
1968+
)
1969+
)
1970+
)
1971+
request_speech_config = types.SpeechConfig(
1972+
voice_config=types.VoiceConfig(
1973+
prebuilt_voice_config=types.PrebuiltVoiceConfig(
1974+
voice_name="Zephyr",
1975+
)
1976+
)
1977+
)
1978+
llm_request.live_connect_config = types.LiveConnectConfig(
1979+
speech_config=request_speech_config
1980+
)
1981+
1982+
mock_live_session = mock.AsyncMock()
1983+
1984+
with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
1985+
1986+
class MockLiveConnect:
1987+
1988+
async def __aenter__(self):
1989+
return mock_live_session
1990+
1991+
async def __aexit__(self, *args):
1992+
pass
1993+
1994+
mock_live_client.aio.live.connect.return_value = MockLiveConnect()
1995+
1996+
# Act
1997+
async with gemini_llm.connect(llm_request) as connection:
1998+
# Assert
1999+
mock_live_client.aio.live.connect.assert_called_once()
2000+
call_args = mock_live_client.aio.live.connect.call_args
2001+
config_arg = call_args.kwargs["config"]
2002+
2003+
# Verify the speech_config from the request ("Zephyr") was overwritten by Gemini's speech_config ("Puck")
2004+
assert config_arg.speech_config is not None
2005+
assert (
2006+
config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name
2007+
== "Puck"
2008+
)
2009+
assert isinstance(connection, GeminiLlmConnection)
2010+
2011+
2012+
@pytest.mark.asyncio
2013+
async def test_connect_speech_config_remains_none_when_both_are_none(
2014+
gemini_llm, llm_request
2015+
):
2016+
"""Tests that speech_config is None when neither Gemini nor the request has it."""
2017+
# Arrange: Ensure both Gemini instance and request have no speech_config
2018+
gemini_llm.speech_config = None
2019+
llm_request.live_connect_config = (
2020+
types.LiveConnectConfig()
2021+
) # speech_config is None
2022+
2023+
mock_live_session = mock.AsyncMock()
2024+
2025+
with mock.patch.object(gemini_llm, "_live_api_client") as mock_live_client:
2026+
2027+
class MockLiveConnect:
2028+
2029+
async def __aenter__(self):
2030+
return mock_live_session
2031+
2032+
async def __aexit__(self, *args):
2033+
pass
2034+
2035+
mock_live_client.aio.live.connect.return_value = MockLiveConnect()
2036+
2037+
# Act
2038+
async with gemini_llm.connect(llm_request) as connection:
2039+
# Assert
2040+
mock_live_client.aio.live.connect.assert_called_once()
2041+
call_args = mock_live_client.aio.live.connect.call_args
2042+
config_arg = call_args.kwargs["config"]
2043+
2044+
# Verify the final speech_config is still None
2045+
assert config_arg.speech_config is None
2046+
assert isinstance(connection, GeminiLlmConnection)

0 commit comments

Comments
 (0)