Skip to content

Commit 23aea19

Browse files
authored
fix(genai): respect per-call generation parameters in ChatGoogleGenerativeAI (#1379)
1 parent 5062205 commit 23aea19

File tree

3 files changed

+106
-8
lines changed

3 files changed

+106
-8
lines changed

libs/genai/langchain_google_genai/chat_models.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,7 +2147,11 @@ def _prepare_params(
21472147
generation_config: dict[str, Any] | None = None,
21482148
**kwargs: Any,
21492149
) -> GenerationConfig:
2150-
if self.thinking_level is not None and self.thinking_budget is not None:
2150+
# Extract thinking parameters with kwargs override
2151+
thinking_budget = kwargs.get("thinking_budget", self.thinking_budget)
2152+
thinking_level = kwargs.get("thinking_level", self.thinking_level)
2153+
2154+
if thinking_level is not None and thinking_budget is not None:
21512155
msg = (
21522156
"Both 'thinking_level' and 'thinking_budget' were specified. "
21532157
"'thinking_level' is not yet supported by the current API version, "
@@ -2161,15 +2165,17 @@ def _prepare_params(
21612165
"candidate_count": self.n,
21622166
"temperature": self.temperature,
21632167
"stop_sequences": stop,
2164-
"max_output_tokens": self.max_output_tokens,
2168+
"max_output_tokens": kwargs.get(
2169+
"max_output_tokens", self.max_output_tokens
2170+
),
21652171
"top_k": self.top_k,
21662172
"top_p": self.top_p,
21672173
"response_modalities": self.response_modalities,
21682174
"thinking_config": (
21692175
(
21702176
(
2171-
{"thinking_budget": self.thinking_budget}
2172-
if self.thinking_budget is not None
2177+
{"thinking_budget": thinking_budget}
2178+
if thinking_budget is not None
21732179
else {}
21742180
)
21752181
| (
@@ -2178,14 +2184,14 @@ def _prepare_params(
21782184
else {}
21792185
)
21802186
| (
2181-
{"thinking_level": self.thinking_level}
2182-
if self.thinking_level is not None
2187+
{"thinking_level": thinking_level}
2188+
if thinking_level is not None
21832189
else {}
21842190
)
21852191
)
2186-
if self.thinking_budget is not None
2192+
if thinking_budget is not None
21872193
or self.include_thoughts is not None
2188-
or self.thinking_level is not None
2194+
or thinking_level is not None
21892195
else None
21902196
),
21912197
}.items()

libs/genai/tests/integration_tests/test_chat_models.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,3 +1331,56 @@ def test_code_execution_builtin(output_version: str) -> None:
13311331
}
13321332
response = llm.invoke([input_message, full, next_message])
13331333
_check_code_execution_output(response, output_version)
1334+
1335+
1336+
def test_chat_google_genai_invoke_with_generation_params() -> None:
1337+
"""Test that generation parameters passed to invoke() are respected.
1338+
1339+
Verifies that `max_output_tokens` (max_tokens) and `thinking_budget`
1340+
parameters passed directly to invoke() method override model defaults.
1341+
"""
1342+
llm = ChatGoogleGenerativeAI(model=_MODEL)
1343+
1344+
# Test with max_output_tokens constraint
1345+
result_constrained = llm.invoke(
1346+
"Alice, Bob, and Carol each live in a different house on the same street: "
1347+
"red, green, and blue. The person who lives in the red house owns a cat. "
1348+
"Bob does not live in the green house. Carol owns a dog. The green house "
1349+
"is to the left of the red house. Alice does not own a cat. Who lives in "
1350+
"each house, and what pet do they own?",
1351+
max_output_tokens=10,
1352+
thinking_budget=0,
1353+
)
1354+
1355+
assert isinstance(result_constrained, AIMessage)
1356+
# Verify output tokens are within limit
1357+
assert result_constrained.usage_metadata is not None
1358+
1359+
output_tokens = result_constrained.usage_metadata.get("output_tokens")
1360+
assert output_tokens is not None, "usage_metadata is missing 'output_tokens'"
1361+
assert output_tokens <= 10, f"Expected output_tokens <= 10, got {output_tokens}"
1362+
1363+
# Verify thinking is disabled
1364+
details = result_constrained.usage_metadata.get("output_token_details") or {}
1365+
assert "reasoning" not in details, (
1366+
"Expected no reasoning tokens when thinking_budget=0"
1367+
)
1368+
1369+
1370+
@pytest.mark.parametrize("max_tokens", [10, 20, 50])
1371+
def test_chat_google_genai_invoke_respects_max_tokens(max_tokens: int) -> None:
1372+
"""Test that different max_output_tokens values are respected."""
1373+
llm = ChatGoogleGenerativeAI(model=_MODEL)
1374+
1375+
result = llm.invoke(
1376+
"Write a detailed essay about artificial intelligence.",
1377+
max_output_tokens=max_tokens,
1378+
)
1379+
1380+
assert isinstance(result, AIMessage)
1381+
assert result.usage_metadata is not None
1382+
output_tokens = result.usage_metadata.get("output_tokens")
1383+
assert output_tokens is not None, "usage_metadata is missing 'output_tokens'"
1384+
assert output_tokens <= max_tokens, (
1385+
f"Expected output_tokens <= {max_tokens}, got {output_tokens}"
1386+
)

libs/genai/tests/unit_tests/test_chat_models.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,3 +3118,42 @@ def test_thinking_budget_alone_still_works() -> None:
31183118
assert config.thinking_config is not None
31193119
assert config.thinking_config.thinking_budget == 64
31203120
assert not hasattr(config.thinking_config, "thinking_level")
3121+
3122+
3123+
def test_kwargs_override_max_output_tokens() -> None:
3124+
"""Test that max_output_tokens can be overridden via kwargs."""
3125+
llm = ChatGoogleGenerativeAI(
3126+
model=MODEL_NAME,
3127+
google_api_key=SecretStr(FAKE_API_KEY),
3128+
max_output_tokens=100,
3129+
)
3130+
3131+
config = llm._prepare_params(stop=None, max_output_tokens=500)
3132+
assert config.max_output_tokens == 500
3133+
3134+
3135+
def test_kwargs_override_thinking_budget() -> None:
3136+
"""Test that thinking_budget can be overridden via kwargs."""
3137+
llm = ChatGoogleGenerativeAI(
3138+
model=MODEL_NAME,
3139+
google_api_key=SecretStr(FAKE_API_KEY),
3140+
thinking_budget=64,
3141+
)
3142+
3143+
config = llm._prepare_params(stop=None, thinking_budget=128)
3144+
assert config.thinking_config is not None
3145+
assert config.thinking_config.thinking_budget == 128
3146+
3147+
3148+
@pytest.mark.xfail(reason="Needs support in SDK.")
3149+
def test_kwargs_override_thinking_level() -> None:
3150+
"""Test that thinking_level can be overridden via kwargs."""
3151+
llm = ChatGoogleGenerativeAI(
3152+
model=MODEL_NAME,
3153+
google_api_key=SecretStr(FAKE_API_KEY),
3154+
thinking_level="low",
3155+
)
3156+
3157+
config = llm._prepare_params(stop=None, thinking_level="high")
3158+
assert config.thinking_config is not None
3159+
assert config.thinking_config.thinking_level == "high"

0 commit comments

Comments
 (0)