fix(genai): respect per-call generation parameters in ChatGoogleGenerativeAI (#1379)

Tombiczek · web-flow · commit 23aea19a8dc2 · 2025-11-24T10:14:14.000+01:00
diff --git a/libs/genai/langchain_google_genai/chat_models.py b/libs/genai/langchain_google_genai/chat_models.py
@@ -2147,7 +2147,11 @@ def _prepare_params(
         generation_config: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> GenerationConfig:
-        if self.thinking_level is not None and self.thinking_budget is not None:
+        # Extract thinking parameters with kwargs override
+        thinking_budget = kwargs.get("thinking_budget", self.thinking_budget)
+        thinking_level = kwargs.get("thinking_level", self.thinking_level)
+
+        if thinking_level is not None and thinking_budget is not None:
             msg = (
                 "Both 'thinking_level' and 'thinking_budget' were specified. "
                 "'thinking_level' is not yet supported by the current API version, "
@@ -2161,15 +2165,17 @@ def _prepare_params(
                 "candidate_count": self.n,
                 "temperature": self.temperature,
                 "stop_sequences": stop,
-                "max_output_tokens": self.max_output_tokens,
+                "max_output_tokens": kwargs.get(
+                    "max_output_tokens", self.max_output_tokens
+                ),
                 "top_k": self.top_k,
                 "top_p": self.top_p,
                 "response_modalities": self.response_modalities,
                 "thinking_config": (
                     (
                         (
-                            {"thinking_budget": self.thinking_budget}
-                            if self.thinking_budget is not None
+                            {"thinking_budget": thinking_budget}
+                            if thinking_budget is not None
                             else {}
                         )
                         | (
@@ -2178,14 +2184,14 @@ def _prepare_params(
                             else {}
                         )
                         | (
-                            {"thinking_level": self.thinking_level}
-                            if self.thinking_level is not None
+                            {"thinking_level": thinking_level}
+                            if thinking_level is not None
                             else {}
                         )
                     )
-                    if self.thinking_budget is not None
+                    if thinking_budget is not None
                     or self.include_thoughts is not None
-                    or self.thinking_level is not None
+                    or thinking_level is not None
                     else None
                 ),
             }.items()
diff --git a/libs/genai/tests/integration_tests/test_chat_models.py b/libs/genai/tests/integration_tests/test_chat_models.py
@@ -1331,3 +1331,56 @@ def test_code_execution_builtin(output_version: str) -> None:
     }
     response = llm.invoke([input_message, full, next_message])
     _check_code_execution_output(response, output_version)
+
+
+def test_chat_google_genai_invoke_with_generation_params() -> None:
+    """Test that generation parameters passed to invoke() are respected.
+
+    Verifies that `max_output_tokens` (max_tokens) and `thinking_budget`
+    parameters passed directly to invoke() method override model defaults.
+    """
+    llm = ChatGoogleGenerativeAI(model=_MODEL)
+
+    # Test with max_output_tokens constraint
+    result_constrained = llm.invoke(
+        "Alice, Bob, and Carol each live in a different house on the same street: "
+        "red, green, and blue. The person who lives in the red house owns a cat. "
+        "Bob does not live in the green house. Carol owns a dog. The green house "
+        "is to the left of the red house. Alice does not own a cat. Who lives in "
+        "each house, and what pet do they own?",
+        max_output_tokens=10,
+        thinking_budget=0,
+    )
+
+    assert isinstance(result_constrained, AIMessage)
+    # Verify output tokens are within limit
+    assert result_constrained.usage_metadata is not None
+
+    output_tokens = result_constrained.usage_metadata.get("output_tokens")
+    assert output_tokens is not None, "usage_metadata is missing 'output_tokens'"
+    assert output_tokens <= 10, f"Expected output_tokens <= 10, got {output_tokens}"
+
+    # Verify thinking is disabled
+    details = result_constrained.usage_metadata.get("output_token_details") or {}
+    assert "reasoning" not in details, (
+        "Expected no reasoning tokens when thinking_budget=0"
+    )
+
+
+@pytest.mark.parametrize("max_tokens", [10, 20, 50])
+def test_chat_google_genai_invoke_respects_max_tokens(max_tokens: int) -> None:
+    """Test that different max_output_tokens values are respected."""
+    llm = ChatGoogleGenerativeAI(model=_MODEL)
+
+    result = llm.invoke(
+        "Write a detailed essay about artificial intelligence.",
+        max_output_tokens=max_tokens,
+    )
+
+    assert isinstance(result, AIMessage)
+    assert result.usage_metadata is not None
+    output_tokens = result.usage_metadata.get("output_tokens")
+    assert output_tokens is not None, "usage_metadata is missing 'output_tokens'"
+    assert output_tokens <= max_tokens, (
+        f"Expected output_tokens <= {max_tokens}, got {output_tokens}"
+    )
diff --git a/libs/genai/tests/unit_tests/test_chat_models.py b/libs/genai/tests/unit_tests/test_chat_models.py
@@ -3118,3 +3118,42 @@ def test_thinking_budget_alone_still_works() -> None:
     assert config.thinking_config is not None
     assert config.thinking_config.thinking_budget == 64
     assert not hasattr(config.thinking_config, "thinking_level")
+
+
+def test_kwargs_override_max_output_tokens() -> None:
+    """Test that max_output_tokens can be overridden via kwargs."""
+    llm = ChatGoogleGenerativeAI(
+        model=MODEL_NAME,
+        google_api_key=SecretStr(FAKE_API_KEY),
+        max_output_tokens=100,
+    )
+
+    config = llm._prepare_params(stop=None, max_output_tokens=500)
+    assert config.max_output_tokens == 500
+
+
+def test_kwargs_override_thinking_budget() -> None:
+    """Test that thinking_budget can be overridden via kwargs."""
+    llm = ChatGoogleGenerativeAI(
+        model=MODEL_NAME,
+        google_api_key=SecretStr(FAKE_API_KEY),
+        thinking_budget=64,
+    )
+
+    config = llm._prepare_params(stop=None, thinking_budget=128)
+    assert config.thinking_config is not None
+    assert config.thinking_config.thinking_budget == 128
+
+
+@pytest.mark.xfail(reason="Needs support in SDK.")
+def test_kwargs_override_thinking_level() -> None:
+    """Test that thinking_level can be overridden via kwargs."""
+    llm = ChatGoogleGenerativeAI(
+        model=MODEL_NAME,
+        google_api_key=SecretStr(FAKE_API_KEY),
+        thinking_level="low",
+    )
+
+    config = llm._prepare_params(stop=None, thinking_level="high")
+    assert config.thinking_config is not None
+    assert config.thinking_config.thinking_level == "high"