From 114075654c74a1f3a0038195e5f886b701d8cb96 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Mon, 10 Nov 2025 14:33:17 +0000
Subject: [PATCH 1/2] Handle InternalServerError with choices=None to enable
 temperature bumping

Some LLM providers occasionally return malformed responses with choices=None.
When this happens, litellm raises InternalServerError during internal
validation in convert_to_model_response_object before returning the
response to our code.

Previously, these InternalServerErrors were retried, but without temperature
bumping. Since temperature=0.0 is deterministic, all retries would hit the
same provider bug and fail.

This change detects InternalServerErrors from convert_to_model_response_object
and converts them to LLMNoResponseError. This enables the RetryMixin's
adaptive behavior, which bumps temperature from 0.0 to 1.0 on retry. The
temperature change makes the provider's response generation non-deterministic,
which can help avoid the same bug on subsequent attempts.

The detection checks for 'convert_to_model_response_object' in the error
message, which is the specific function where litellm validates the
response_object['choices'] field.

This complements PR #1107, which handles the case where litellm successfully
returns a response with empty choices. Together, these changes ensure
temperature bumping happens regardless of where the choices validation fails.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands-sdk/openhands/sdk/llm/llm.py        |  17 ++-
 .../llm/test_api_connection_error_retry.py    | 113 +++++++++++++++++-
 2 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
index ff6afd4299..1e00d579d1 100644
--- a/openhands-sdk/openhands/sdk/llm/llm.py
+++ b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -518,7 +518,22 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
             self._telemetry.on_request(log_ctx=log_ctx)
             # Merge retry-modified kwargs (like temperature) with call_kwargs
             final_kwargs = {**call_kwargs, **retry_kwargs}
-            resp = self._transport_call(messages=formatted_messages, **final_kwargs)
+            try:
+                resp = self._transport_call(messages=formatted_messages, **final_kwargs)
+            except InternalServerError as e:
+                # litellm sometimes raises InternalServerError when it receives
+                # a malformed response from the provider (e.g., choices=None).
+                # This happens in convert_to_model_response_object when
+                # validating response_object["choices"]. We convert this specific
+                # error to LLMNoResponseError so that temperature bumping is
+                # triggered on retry, which can help avoid the same provider bug.
+                error_msg = str(e).lower()
+                if "convert_to_model_response_object" in error_msg:
+                    raise LLMNoResponseError(
+                        f"Provider returned malformed response: {e}"
+                    ) from e
+                raise
+
             raw_resp: ModelResponse | None = None
             if use_mock_tools:
                 raw_resp = copy.deepcopy(resp)
diff --git a/tests/sdk/llm/test_api_connection_error_retry.py b/tests/sdk/llm/test_api_connection_error_retry.py
index 315490d18f..c3e1bc3ce0 100644
--- a/tests/sdk/llm/test_api_connection_error_retry.py
+++ b/tests/sdk/llm/test_api_connection_error_retry.py
@@ -1,12 +1,12 @@
 from unittest.mock import patch
 
 import pytest
-from litellm.exceptions import APIConnectionError
+from litellm.exceptions import APIConnectionError, InternalServerError
 from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
 from pydantic import SecretStr
 
 from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
-from openhands.sdk.llm.exceptions import LLMServiceUnavailableError
+from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError
 
 
 def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
@@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int):
         assert isinstance(max_attempts, int)
         assert attempt >= 1
         assert max_attempts == default_config.num_retries
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_choices_none_retries_with_temperature_bump(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that InternalServerError from convert_to_model_response_object
+    is converted to LLMNoResponseError and retried with temperature bump.
+    """
+    # Ensure we start at 0.0 to trigger bump to 1.0 on retry
+    assert default_config.temperature == 0.0
+
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message=(
+                "Invalid response object Traceback (most recent call last):\n"
+                '  File "litellm/litellm_core_utils/llm_response_utils/'
+                'convert_dict_to_response.py", line 466, in '
+                "convert_to_model_response_object\n"
+                '    assert response_object["choices"] is not None\n'
+                "AssertionError"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        create_mock_response("success"),
+    ]
+
+    response = default_config.completion(
+        messages=[Message(role="user", content=[TextContent(text="hi")])]
+    )
+
+    assert isinstance(response, LLMResponse)
+    assert response.message is not None
+    assert mock_litellm_completion.call_count == 2
+
+    # Verify that on the second call, temperature was bumped to 1.0
+    _, second_kwargs = mock_litellm_completion.call_args_list[1]
+    assert second_kwargs.get("temperature") == 1.0
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_choices_none_exhausts_retries(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that when all retries fail with InternalServerError from
+    convert_to_model_response_object, LLMNoResponseError is raised.
+    """
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message=(
+                "File convert_to_model_response_object: "
+                "assert response_object['choices'] is not None"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        InternalServerError(
+            message=(
+                "File convert_to_model_response_object: "
+                "assert response_object['choices'] is not None"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+    ]
+
+    with pytest.raises(LLMNoResponseError) as excinfo:
+        default_config.completion(
+            messages=[Message(role="user", content=[TextContent(text="hi")])]
+        )
+
+    assert mock_litellm_completion.call_count == default_config.num_retries
+    assert "malformed response" in str(excinfo.value).lower()
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_unrelated_not_converted(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that unrelated InternalServerError (not about choices) is NOT
+    converted to LLMNoResponseError.
+    """
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message="Database connection failed",
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        InternalServerError(
+            message="Database connection failed",
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+    ]
+
+    # Should raise InternalServerError (mapped to LLMServiceUnavailableError),
+    # not LLMNoResponseError
+    with pytest.raises(Exception) as excinfo:
+        default_config.completion(
+            messages=[Message(role="user", content=[TextContent(text="hi")])]
+        )
+
+    # Should NOT be LLMNoResponseError
+    assert not isinstance(excinfo.value, LLMNoResponseError)
+    assert mock_litellm_completion.call_count == default_config.num_retries

From a71b56cc57f9aa2200c494a5fbdcb264e454743f Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Mon, 10 Nov 2025 17:01:04 +0000
Subject: [PATCH 2/2] llm(retry): move InternalServerError(choices=None)
 handling into RetryMixin

- Convert specific InternalServerError from LiteLLM response conversion to LLMNoResponseError in retry wrapper
- Bump temperature for this case in before_sleep just like LLMNoResponseError
- Remove ad-hoc try/except from LLM.completion path so logic is centralized

This ensures the retry path handles these errors consistently and enables temperature bumping.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands-sdk/openhands/sdk/llm/llm.py        | 16 +-------
 .../openhands/sdk/llm/utils/retry_mixin.py    | 41 +++++++++++++++----
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
index e4e6cea958..3fefdaab99 100644
--- a/openhands-sdk/openhands/sdk/llm/llm.py
+++ b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -521,21 +521,7 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
             self._telemetry.on_request(log_ctx=log_ctx)
             # Merge retry-modified kwargs (like temperature) with call_kwargs
             final_kwargs = {**call_kwargs, **retry_kwargs}
-            try:
-                resp = self._transport_call(messages=formatted_messages, **final_kwargs)
-            except InternalServerError as e:
-                # litellm sometimes raises InternalServerError when it receives
-                # a malformed response from the provider (e.g., choices=None).
-                # This happens in convert_to_model_response_object when
-                # validating response_object["choices"]. We convert this specific
-                # error to LLMNoResponseError so that temperature bumping is
-                # triggered on retry, which can help avoid the same provider bug.
-                error_msg = str(e).lower()
-                if "convert_to_model_response_object" in error_msg:
-                    raise LLMNoResponseError(
-                        f"Provider returned malformed response: {e}"
-                    ) from e
-                raise
+            resp = self._transport_call(messages=formatted_messages, **final_kwargs)
 
             raw_resp: ModelResponse | None = None
             if use_mock_tools:
diff --git a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py
index 11b025d283..d0a2fffd10 100644
--- a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py
+++ b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py
@@ -1,6 +1,7 @@
 from collections.abc import Callable, Iterable
 from typing import Any, cast
 
+from litellm.exceptions import InternalServerError
 from tenacity import (
     RetryCallState,
     retry,
@@ -19,6 +20,14 @@
 RetryListener = Callable[[int, int], None]
 
 
+def _looks_like_choices_none_error(msg: str) -> bool:
+    m = msg.lower()
+    if "choices" not in m:
+        return False
+    # Heuristics matching LiteLLM response conversion assertions/validation
+    return any(k in m for k in ("none", "assert", "invalid"))
+
+
 class RetryMixin:
     """Mixin class for retry logic."""
 
@@ -50,24 +59,28 @@ def before_sleep(retry_state: RetryCallState) -> None:
             if exc is None:
                 return
 
-            # Only adjust temperature for LLMNoResponseError
-            if isinstance(exc, LLMNoResponseError):
+            # Adjust temperature for LLMNoResponseError or certain InternalServerError
+            should_bump = isinstance(exc, LLMNoResponseError) or (
+                isinstance(exc, InternalServerError)
+                and _looks_like_choices_none_error(str(exc))
+            )
+            if should_bump:
                 kwargs = getattr(retry_state, "kwargs", None)
                 if isinstance(kwargs, dict):
                     current_temp = kwargs.get("temperature", 0)
                     if current_temp == 0:
                         kwargs["temperature"] = 1.0
                         logger.warning(
-                            "LLMNoResponseError with temperature=0, "
+                            "LLMNoResponse-like error with temperature=0, "
                             "setting temperature to 1.0 for next attempt."
                         )
                     else:
                         logger.warning(
-                            f"LLMNoResponseError with temperature={current_temp}, "
-                            "keeping original temperature"
+                            "LLMNoResponse-like error with temperature="
+                            f"{current_temp}, keeping original temperature"
                         )
 
-        retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
+        tenacity_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
             before_sleep=before_sleep,
             stop=stop_after_attempt(num_retries),
             reraise=True,
@@ -78,7 +91,21 @@ def before_sleep(retry_state: RetryCallState) -> None:
                 max=retry_max_wait,
             ),
         )
-        return retry_decorator
+
+        def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+            def wrapped(*args: Any, **kwargs: Any) -> Any:
+                try:
+                    return func(*args, **kwargs)
+                except InternalServerError as e:
+                    if _looks_like_choices_none_error(str(e)):
+                        raise LLMNoResponseError(
+                            f"Provider returned malformed response: {e}"
+                        ) from e
+                    raise
+
+            return tenacity_decorator(wrapped)
+
+        return decorator
 
     def log_retry_attempt(self, retry_state: RetryCallState) -> None:
         """Log retry attempts."""