Handle InternalServerError with choices=None to enable temperature bumping

openhands-agent · openhands-agent · commit 114075654c74 · 2025-11-10T14:44:56.000Z
Some LLM providers occasionally return malformed responses with choices=None. When this happens, litellm raises InternalServerError during internal validation in convert_to_model_response_object before returning the response to our code. Previously, these InternalServerErrors were retried, but without temperature bumping. Since temperature=0.0 is deterministic, all retries would hit the same provider bug and fail. This change detects InternalServerErrors from convert_to_model_response_object and converts them to LLMNoResponseError. This enables the RetryMixin's adaptive behavior, which bumps temperature from 0.0 to 1.0 on retry. The temperature change makes the provider's response generation non-deterministic, which can help avoid the same bug on subsequent attempts. The detection checks for 'convert_to_model_response_object' in the error message, which is the specific function where litellm validates the response_object['choices'] field. This complements PR #1107, which handles the case where litellm successfully returns a response with empty choices. Together, these changes ensure temperature bumping happens regardless of where the choices validation fails. Co-authored-by: openhands <openhands@all-hands.dev>
diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py
@@ -518,7 +518,22 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
             self._telemetry.on_request(log_ctx=log_ctx)
             # Merge retry-modified kwargs (like temperature) with call_kwargs
             final_kwargs = {**call_kwargs, **retry_kwargs}
-            resp = self._transport_call(messages=formatted_messages, **final_kwargs)
+            try:
+                resp = self._transport_call(messages=formatted_messages, **final_kwargs)
+            except InternalServerError as e:
+                # litellm sometimes raises InternalServerError when it receives
+                # a malformed response from the provider (e.g., choices=None).
+                # This happens in convert_to_model_response_object when
+                # validating response_object["choices"]. We convert this specific
+                # error to LLMNoResponseError so that temperature bumping is
+                # triggered on retry, which can help avoid the same provider bug.
+                error_msg = str(e).lower()
+                if "convert_to_model_response_object" in error_msg:
+                    raise LLMNoResponseError(
+                        f"Provider returned malformed response: {e}"
+                    ) from e
+                raise
+
             raw_resp: ModelResponse | None = None
             if use_mock_tools:
                 raw_resp = copy.deepcopy(resp)
diff --git a/tests/sdk/llm/test_api_connection_error_retry.py b/tests/sdk/llm/test_api_connection_error_retry.py
@@ -1,12 +1,12 @@
 from unittest.mock import patch
 
 import pytest
-from litellm.exceptions import APIConnectionError
+from litellm.exceptions import APIConnectionError, InternalServerError
 from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
 from pydantic import SecretStr
 
 from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
-from openhands.sdk.llm.exceptions import LLMServiceUnavailableError
+from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError
 
 
 def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
@@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int):
         assert isinstance(max_attempts, int)
         assert attempt >= 1
         assert max_attempts == default_config.num_retries
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_choices_none_retries_with_temperature_bump(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that InternalServerError from convert_to_model_response_object
+    is converted to LLMNoResponseError and retried with temperature bump.
+    """
+    # Ensure we start at 0.0 to trigger bump to 1.0 on retry
+    assert default_config.temperature == 0.0
+
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message=(
+                "Invalid response object Traceback (most recent call last):\n"
+                '  File "litellm/litellm_core_utils/llm_response_utils/'
+                'convert_dict_to_response.py", line 466, in '
+                "convert_to_model_response_object\n"
+                '    assert response_object["choices"] is not None\n'
+                "AssertionError"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        create_mock_response("success"),
+    ]
+
+    response = default_config.completion(
+        messages=[Message(role="user", content=[TextContent(text="hi")])]
+    )
+
+    assert isinstance(response, LLMResponse)
+    assert response.message is not None
+    assert mock_litellm_completion.call_count == 2
+
+    # Verify that on the second call, temperature was bumped to 1.0
+    _, second_kwargs = mock_litellm_completion.call_args_list[1]
+    assert second_kwargs.get("temperature") == 1.0
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_choices_none_exhausts_retries(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that when all retries fail with InternalServerError from
+    convert_to_model_response_object, LLMNoResponseError is raised.
+    """
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message=(
+                "File convert_to_model_response_object: "
+                "assert response_object['choices'] is not None"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        InternalServerError(
+            message=(
+                "File convert_to_model_response_object: "
+                "assert response_object['choices'] is not None"
+            ),
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+    ]
+
+    with pytest.raises(LLMNoResponseError) as excinfo:
+        default_config.completion(
+            messages=[Message(role="user", content=[TextContent(text="hi")])]
+        )
+
+    assert mock_litellm_completion.call_count == default_config.num_retries
+    assert "malformed response" in str(excinfo.value).lower()
+
+
+@patch("openhands.sdk.llm.llm.litellm_completion")
+def test_internal_server_error_unrelated_not_converted(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that unrelated InternalServerError (not about choices) is NOT
+    converted to LLMNoResponseError.
+    """
+    mock_litellm_completion.side_effect = [
+        InternalServerError(
+            message="Database connection failed",
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+        InternalServerError(
+            message="Database connection failed",
+            llm_provider="test_provider",
+            model="test_model",
+        ),
+    ]
+
+    # Should raise InternalServerError (mapped to LLMServiceUnavailableError),
+    # not LLMNoResponseError
+    with pytest.raises(Exception) as excinfo:
+        default_config.completion(
+            messages=[Message(role="user", content=[TextContent(text="hi")])]
+        )
+
+    # Should NOT be LLMNoResponseError
+    assert not isinstance(excinfo.value, LLMNoResponseError)
+    assert mock_litellm_completion.call_count == default_config.num_retries