matdev83
diff --git a/‎data/test_suite_state.json‎
Lines changed: 1 addition & 1 deletion b/‎data/test_suite_state.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/zai-max-tokens-implementation.md‎
Lines changed: 24 additions & 24 deletions b/‎docs/zai-max-tokens-implementation.md‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎src/connectors/gemini.py‎
Lines changed: 98 additions & 7 deletions b/‎src/connectors/gemini.py‎
Lines changed: 98 additions & 7 deletions
diff --git a/‎src/connectors/qwen_oauth.py‎
Lines changed: 38 additions & 0 deletions b/‎src/connectors/qwen_oauth.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎src/connectors/zai.py‎
Lines changed: 34 additions & 6 deletions b/‎src/connectors/zai.py‎
Lines changed: 34 additions & 6 deletions
@@ -1,4 +1,4 @@
 {
-  "test_count": 5094,
+  "test_count": 5099,
   "last_updated": "1762168167.0802596"
 }
@@ -2,33 +2,33 @@
 
 ## Overview
 
-Both ZAI connectors (`zai` and `zai-coding-plan`) now enforce a 128K (131,072 tokens) maximum output limit as specified by the ZAI API provider.
+Both ZAI connectors (`zai` and `zai-coding-plan`) now enforce a 200K (200,000 tokens) maximum output limit as specified by the ZAI API provider.
 
 ## Implementation Details
 
 ### Default Behavior
-- **Default max_tokens**: 131,072 (128K)
-- This is the maximum supported by ZAI's backend models
-- Used when client doesn't explicitly specify max_tokens or provides invalid values (None, 0, negative)
+- **Default max_tokens**: 200,000 (200K)
+- This is the maximum supported by ZAI's backend models
+- Used when client doesn't explicitly specify max_tokens or provides invalid values (None, 0, negative)
 
 ### Client Override Rules
 Clients can override the default by explicitly setting `max_tokens` in their request:
 
-1. **Valid Range**: 1,024 to 131,072 tokens
+1. **Valid Range**: 1,024 to 200,000 tokens
    - Values below 1K are clamped to 1,024
-   - Values above 128K are clamped to 131,072
+   - Values above 200K are clamped to 200,000
    - Values within range are preserved as-is
 
 2. **Invalid Values**: None, 0, or negative numbers
-   - Automatically use the 128K default
+   - Automatically use the 200K default
    - Ensures requests never fail due to missing/invalid max_tokens
 
 ### Code Locations
 
-#### ZaiCodingPlanBackend
-- File: `src/connectors/zai_coding_plan.py`
-- Method: `_prepare_payload()`
-- Inherits from: `OpenAIConnector`
+#### ZaiCodingPlanBackend
+- File: `src/connectors/zai_coding_plan.py`
+- Method: `_prepare_payload()`
+- Inherits from: `OpenAIConnector`
 
 #### ZAIConnector
 - File: `src/connectors/zai.py`
@@ -38,19 +38,19 @@ Clients can override the default by explicitly setting `max_tokens` in their req
 ## Examples
 
 ### Example 1: No max_tokens specified
-```python
-request = {
-    "model": "zai-coding-plan:glm-4.6",
-    "messages": [{"role": "user", "content": "Hello"}],
-    # max_tokens not specified
-}
-# Result: max_tokens = 131072 (128K)
-```
+```python
+request = {
+    "model": "zai-coding-plan:glm-4.6",
+    "messages": [{"role": "user", "content": "Hello"}],
+    # max_tokens not specified
+}
+# Result: max_tokens = 200000 (200K)
+```
 
 ### Example 2: Explicit valid value
 ```python
 request = {
-    "model": "zai-coding-plan:glm-4.6",
+    "model": "zai-coding-plan:glm-4.6",
     "messages": [{"role": "user", "content": "Hello"}],
     "max_tokens": 4096
 }
@@ -60,7 +60,7 @@ request = {
 ### Example 3: Value below minimum
 ```python
 request = {
-    "model": "zai-coding-plan:glm-4.6",
+    "model": "zai-coding-plan:glm-4.6",
     "messages": [{"role": "user", "content": "Hello"}],
     "max_tokens": 512
 }
@@ -70,11 +70,11 @@ request = {
 ### Example 4: Value above maximum
 ```python
 request = {
-    "model": "zai-coding-plan:glm-4.6",
+    "model": "zai-coding-plan:glm-4.6",
     "messages": [{"role": "user", "content": "Hello"}],
     "max_tokens": 200000
 }
-# Result: max_tokens = 131072 (clamped to maximum)
+# Result: max_tokens = 200000 (clamped to maximum)
 ```
 
 ## Testing
@@ -91,7 +91,7 @@ All tests pass successfully.
 ## Benefits
 
 1. **Prevents 422 Errors**: Ensures max_tokens is always valid
-2. **Maximizes Output**: Uses 128K by default for agentic coding tasks
+2. **Maximizes Output**: Uses 200K by default for agentic coding tasks
 3. **Client Control**: Allows explicit override within valid range
 4. **Robust**: Handles edge cases (None, 0, negative, out-of-range)
 5. **Consistent**: Same logic across both ZAI connectors
@@ -13,6 +13,7 @@
 
 from src.connectors.base import LLMBackend
 from src.core.common.exceptions import (
+    AuthenticationError,
     BackendError,
     ServiceUnavailableError,
 )
@@ -415,7 +416,7 @@ async def chat_completions(  # type: ignore[override]
         effective_model: str,
         identity: IAppIdentityConfig | None = None,
         openrouter_api_base_url: str | None = None,
-        openrouter_headers_provider: Callable[[str, str], dict[str, str]] | None = None,
+        openrouter_headers_provider: Callable[[Any, str], dict[str, str]] | None = None,
         key_name: str | None = None,
         api_key: str | None = None,
         project: str | None = None,
@@ -425,7 +426,12 @@ async def chat_completions(  # type: ignore[override]
     ) -> ResponseEnvelope | StreamingResponseEnvelope:
         # Resolve base configuration
         base_api_url, headers = await self._resolve_gemini_api_config(
-            gemini_api_base_url, openrouter_api_base_url, api_key, **kwargs
+            gemini_api_base_url,
+            openrouter_api_base_url,
+            api_key,
+            openrouter_headers_provider=openrouter_headers_provider,
+            key_name=key_name,
+            **kwargs,
         )
         if identity:
             headers.update(identity.get_resolved_headers(None))
@@ -530,11 +536,31 @@ async def chat_completions(  # type: ignore[override]
             model_url, payload, headers, effective_model
         )
 
+    def _build_openrouter_header_context(self) -> dict[str, str]:
+        referer = "http://localhost:8000"
+        title = "InterceptorProxy"
+
+        identity = getattr(self.config, "identity", None)
+        if identity is not None:
+            referer = (
+                getattr(getattr(identity, "url", None), "default_value", referer)
+                or referer
+            )
+            title = (
+                getattr(getattr(identity, "title", None), "default_value", title)
+                or title
+            )
+
+        return {"app_site_url": referer, "app_x_title": title}
+
     async def _resolve_gemini_api_config(
         self,
         gemini_api_base_url: str | None,
         openrouter_api_base_url: str | None,
         api_key: str | None,
+        *,
+        openrouter_headers_provider: Callable[[Any, str], dict[str, str]] | None = None,
+        key_name: str | None = None,
         **kwargs: Any,
     ) -> tuple[str, dict[str, str]]:
         # Prefer explicit params, then kwargs, then instance attributes set during initialize
@@ -550,12 +576,77 @@ async def _resolve_gemini_api_config(
                 status_code=500,
                 detail="Gemini API base URL and API key must be provided.",
             )
-        key_name_to_use = (
-            kwargs.get("key_name")
-            or getattr(self, "key_name", None)
-            or "x-goog-api-key"
+        normalized_base = base.rstrip("/")
+
+        # Only use OpenRouter mode if the chosen base is actually OpenRouter
+        # OpenRouter mode should only be enabled when the resolved base URL is different
+        # from the default Gemini API base URL, indicating we're actually routing to OpenRouter
+        gemini_default_base = "https://generativelanguage.googleapis.com"
+        using_openrouter = (
+            openrouter_api_base_url is not None
+            and normalized_base != gemini_default_base.rstrip("/")
         )
-        return base.rstrip("/"), ensure_loop_guard_header({key_name_to_use: key})
+
+        headers: dict[str, str]
+        if using_openrouter:
+            headers = {}
+            provided_headers: dict[str, str] | None = None
+
+            if openrouter_headers_provider is not None:
+                errors: list[Exception] = []
+
+                if key_name is not None:
+                    try:
+                        candidate = openrouter_headers_provider(key_name, key)
+                    except (AttributeError, TypeError) as exc:
+                        errors.append(exc)
+                    else:
+                        if candidate:
+                            provided_headers = dict(candidate)
+
+                if provided_headers is None:
+                    context = self._build_openrouter_header_context()
+                    try:
+                        candidate = openrouter_headers_provider(context, key)
+                    except Exception as exc:  # pragma: no cover - defensive guard
+                        if errors and logger.isEnabledFor(logging.DEBUG):
+                            logger.debug(
+                                "OpenRouter headers provider rejected key_name input: %s",
+                                errors[-1],
+                                exc_info=True,
+                            )
+                        raise AuthenticationError(
+                            message="OpenRouter headers provider failed to produce headers.",
+                            code="missing_credentials",
+                        ) from exc
+                    else:
+                        provided_headers = dict(candidate)
+
+            if provided_headers is None:
+                context = self._build_openrouter_header_context()
+                provided_headers = {
+                    "Authorization": f"Bearer {key}",
+                    "Content-Type": "application/json",
+                    "HTTP-Referer": context["app_site_url"],
+                    "X-Title": context["app_x_title"],
+                }
+
+            headers.update(provided_headers)
+            context = self._build_openrouter_header_context()
+            headers.setdefault("Authorization", f"Bearer {key}")
+            headers.setdefault("Content-Type", "application/json")
+            headers.setdefault("HTTP-Referer", context["app_site_url"])
+            headers.setdefault("X-Title", context["app_x_title"])
+        else:
+            key_name_to_use = (
+                key_name
+                or kwargs.get("key_name")
+                or getattr(self, "key_name", None)
+                or "x-goog-api-key"
+            )
+            headers = {key_name_to_use: key}
+
+        return normalized_base, ensure_loop_guard_header(headers)
 
     def _apply_generation_config(
         self, payload: dict[str, Any], request_data: ChatRequest
 
@@ -238,6 +238,44 @@ def _launch_cli_refresh_process(self) -> None:
                 exc_info=True,
             )
 
+    async def _prepare_payload(
+        self,
+        request_data: Any,
+        processed_messages: list[Any],
+        effective_model: str,
+    ) -> dict[str, Any]:
+        """Ensure sampling parameters are forwarded to the Qwen API payload."""
+
+        payload = await super()._prepare_payload(
+            request_data, processed_messages, effective_model
+        )
+
+        def _extract_param(name: str) -> Any | None:
+            value = getattr(request_data, name, None)
+            if value is None and isinstance(request_data, dict):
+                value = request_data.get(name)
+            if value is None:
+                extra_body = getattr(request_data, "extra_body", None)
+                if isinstance(extra_body, dict):
+                    value = extra_body.get(name)
+            return value
+
+        top_p = _extract_param("top_p")
+        if top_p is not None:
+            try:
+                payload["top_p"] = float(top_p)
+            except (TypeError, ValueError):
+                logger.debug("Ignoring non-numeric top_p value: %r", top_p)
+
+        top_k = _extract_param("top_k")
+        if top_k is not None:
+            try:
+                payload["top_k"] = int(top_k)
+            except (TypeError, ValueError):
+                logger.debug("Ignoring non-integer top_k value: %r", top_k)
+
+        return payload
+
     async def _poll_for_new_token(self, max_wait_seconds: float | None = None) -> bool:
         """Poll the credential file for an updated token after CLI refresh."""
         if not self._is_token_expired():
 
@@ -2,6 +2,7 @@
 ZAI connector for Zhipu AI's GLM models
 """
 
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -21,6 +22,9 @@
     from src.core.services.translation_service import TranslationService
 
 
+logger = logging.getLogger(__name__)
+
+
 class ZAIConnector(OpenAIConnector):
     """ZAI backend connector for Zhipu AI's GLM models."""
 
@@ -134,35 +138,59 @@ async def _prepare_payload(
         effective_model: str,
     ) -> dict[str, Any]:
         """
-        Prepare payload for ZAI backend with 128K max_tokens support.
+        Prepare payload for ZAI backend with 200K max_tokens support.
 
-        ZAI backend supports up to 128K output tokens. This method ensures
+        ZAI backend supports up to 200K output tokens. This method ensures
         max_tokens is set appropriately based on client request.
         """
         payload = await super()._prepare_payload(
             request_data, processed_messages, effective_model
         )
 
+        def _extract_param(name: str) -> Any | None:
+            value = getattr(request_data, name, None)
+            if value is None and isinstance(request_data, dict):
+                value = request_data.get(name)
+            if value is None:
+                extra_body = getattr(request_data, "extra_body", None)
+                if isinstance(extra_body, dict):
+                    value = extra_body.get(name)
+            return value
+
+        top_p = _extract_param("top_p")
+        if top_p is not None:
+            try:
+                payload["top_p"] = float(top_p)
+            except (TypeError, ValueError):
+                logger.debug("Ignoring non-numeric top_p value for ZAI: %r", top_p)
+
+        top_k = _extract_param("top_k")
+        if top_k is not None:
+            try:
+                payload["top_k"] = int(top_k)
+            except (TypeError, ValueError):
+                logger.debug("Ignoring non-integer top_k value for ZAI: %r", top_k)
+
         # ZAI currently breaks tool calling when reasoning is enabled. The upstream
         # service does not support the OpenAI reasoning payload, so strip any
         # client-specified reasoning configuration before sending the request.
         payload.pop("reasoning", None)
         payload.pop("reasoning_effort", None)
 
-        # ZAI backend supports up to 128K output tokens
+        # ZAI backend supports up to 200K output tokens
         # Override max_tokens only if client explicitly set a valid positive value
         requested_max_tokens = getattr(request_data, "max_tokens", None)
 
         if requested_max_tokens is not None and requested_max_tokens > 0:
             # Client explicitly requested a value - validate and clamp to valid range
             # Only enforce maximum limit, allow any positive value as minimum
-            if requested_max_tokens > 131072:  # 128K
-                payload["max_tokens"] = 131072
+            if requested_max_tokens > 200000:  # 200K
+                payload["max_tokens"] = 200000
             else:
                 payload["max_tokens"] = requested_max_tokens
         else:
             # No explicit request or invalid value (None, 0, negative) - use ZAI's max
-            payload["max_tokens"] = 131072  # 128K default for ZAI
+            payload["max_tokens"] = 200000  # 200K default for ZAI
 
         return payload
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`{`
`2`		`- "test_count": 5094,`
	`2`	`+ "test_count": 5099,`
`3`	`3`	`"last_updated": "1762168167.0802596"`
`4`	`4`	`}`