From 114075654c74a1f3a0038195e5f886b701d8cb96 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 10 Nov 2025 14:33:17 +0000 Subject: [PATCH 1/2] Handle InternalServerError with choices=None to enable temperature bumping Some LLM providers occasionally return malformed responses with choices=None. When this happens, litellm raises InternalServerError during internal validation in convert_to_model_response_object before returning the response to our code. Previously, these InternalServerErrors were retried, but without temperature bumping. Since temperature=0.0 is deterministic, all retries would hit the same provider bug and fail. This change detects InternalServerErrors from convert_to_model_response_object and converts them to LLMNoResponseError. This enables the RetryMixin's adaptive behavior, which bumps temperature from 0.0 to 1.0 on retry. The temperature change makes the provider's response generation non-deterministic, which can help avoid the same bug on subsequent attempts. The detection checks for 'convert_to_model_response_object' in the error message, which is the specific function where litellm validates the response_object['choices'] field. This complements PR #1107, which handles the case where litellm successfully returns a response with empty choices. Together, these changes ensure temperature bumping happens regardless of where the choices validation fails. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/llm/llm.py | 17 ++- .../llm/test_api_connection_error_retry.py | 113 +++++++++++++++++- 2 files changed, 127 insertions(+), 3 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index ff6afd4299..1e00d579d1 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -518,7 +518,22 @@ def _one_attempt(**retry_kwargs) -> ModelResponse: self._telemetry.on_request(log_ctx=log_ctx) # Merge retry-modified kwargs (like temperature) with call_kwargs final_kwargs = {**call_kwargs, **retry_kwargs} - resp = self._transport_call(messages=formatted_messages, **final_kwargs) + try: + resp = self._transport_call(messages=formatted_messages, **final_kwargs) + except InternalServerError as e: + # litellm sometimes raises InternalServerError when it receives + # a malformed response from the provider (e.g., choices=None). + # This happens in convert_to_model_response_object when + # validating response_object["choices"]. We convert this specific + # error to LLMNoResponseError so that temperature bumping is + # triggered on retry, which can help avoid the same provider bug. + error_msg = str(e).lower() + if "convert_to_model_response_object" in error_msg: + raise LLMNoResponseError( + f"Provider returned malformed response: {e}" + ) from e + raise + raw_resp: ModelResponse | None = None if use_mock_tools: raw_resp = copy.deepcopy(resp) diff --git a/tests/sdk/llm/test_api_connection_error_retry.py b/tests/sdk/llm/test_api_connection_error_retry.py index 315490d18f..c3e1bc3ce0 100644 --- a/tests/sdk/llm/test_api_connection_error_retry.py +++ b/tests/sdk/llm/test_api_connection_error_retry.py @@ -1,12 +1,12 @@ from unittest.mock import patch import pytest -from litellm.exceptions import APIConnectionError +from litellm.exceptions import APIConnectionError, InternalServerError from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage from pydantic import SecretStr from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent -from openhands.sdk.llm.exceptions import LLMServiceUnavailableError +from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError def create_mock_response(content: str = "Test response", response_id: str = "test-id"): @@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int): assert isinstance(max_attempts, int) assert attempt >= 1 assert max_attempts == default_config.num_retries + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_choices_none_retries_with_temperature_bump( + mock_litellm_completion, default_config +): + """ + Test that InternalServerError from convert_to_model_response_object + is converted to LLMNoResponseError and retried with temperature bump. + """ + # Ensure we start at 0.0 to trigger bump to 1.0 on retry + assert default_config.temperature == 0.0 + + mock_litellm_completion.side_effect = [ + InternalServerError( + message=( + "Invalid response object Traceback (most recent call last):\n" + ' File "litellm/litellm_core_utils/llm_response_utils/' + 'convert_dict_to_response.py", line 466, in ' + "convert_to_model_response_object\n" + ' assert response_object["choices"] is not None\n' + "AssertionError" + ), + llm_provider="test_provider", + model="test_model", + ), + create_mock_response("success"), + ] + + response = default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + assert isinstance(response, LLMResponse) + assert response.message is not None + assert mock_litellm_completion.call_count == 2 + + # Verify that on the second call, temperature was bumped to 1.0 + _, second_kwargs = mock_litellm_completion.call_args_list[1] + assert second_kwargs.get("temperature") == 1.0 + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_choices_none_exhausts_retries( + mock_litellm_completion, default_config +): + """ + Test that when all retries fail with InternalServerError from + convert_to_model_response_object, LLMNoResponseError is raised. + """ + mock_litellm_completion.side_effect = [ + InternalServerError( + message=( + "File convert_to_model_response_object: " + "assert response_object['choices'] is not None" + ), + llm_provider="test_provider", + model="test_model", + ), + InternalServerError( + message=( + "File convert_to_model_response_object: " + "assert response_object['choices'] is not None" + ), + llm_provider="test_provider", + model="test_model", + ), + ] + + with pytest.raises(LLMNoResponseError) as excinfo: + default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + assert mock_litellm_completion.call_count == default_config.num_retries + assert "malformed response" in str(excinfo.value).lower() + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_unrelated_not_converted( + mock_litellm_completion, default_config +): + """ + Test that unrelated InternalServerError (not about choices) is NOT + converted to LLMNoResponseError. + """ + mock_litellm_completion.side_effect = [ + InternalServerError( + message="Database connection failed", + llm_provider="test_provider", + model="test_model", + ), + InternalServerError( + message="Database connection failed", + llm_provider="test_provider", + model="test_model", + ), + ] + + # Should raise InternalServerError (mapped to LLMServiceUnavailableError), + # not LLMNoResponseError + with pytest.raises(Exception) as excinfo: + default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + # Should NOT be LLMNoResponseError + assert not isinstance(excinfo.value, LLMNoResponseError) + assert mock_litellm_completion.call_count == default_config.num_retries From a71b56cc57f9aa2200c494a5fbdcb264e454743f Mon Sep 17 00:00:00 2001 From: enyst Date: Mon, 10 Nov 2025 17:01:04 +0000 Subject: [PATCH 2/2] llm(retry): move InternalServerError(choices=None) handling into RetryMixin - Convert specific InternalServerError from LiteLLM response conversion to LLMNoResponseError in retry wrapper - Bump temperature for this case in before_sleep just like LLMNoResponseError - Remove ad-hoc try/except from LLM.completion path so logic is centralized This ensures the retry path handles these errors consistently and enables temperature bumping. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/llm/llm.py | 16 +------- .../openhands/sdk/llm/utils/retry_mixin.py | 41 +++++++++++++++---- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index e4e6cea958..3fefdaab99 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -521,21 +521,7 @@ def _one_attempt(**retry_kwargs) -> ModelResponse: self._telemetry.on_request(log_ctx=log_ctx) # Merge retry-modified kwargs (like temperature) with call_kwargs final_kwargs = {**call_kwargs, **retry_kwargs} - try: - resp = self._transport_call(messages=formatted_messages, **final_kwargs) - except InternalServerError as e: - # litellm sometimes raises InternalServerError when it receives - # a malformed response from the provider (e.g., choices=None). - # This happens in convert_to_model_response_object when - # validating response_object["choices"]. We convert this specific - # error to LLMNoResponseError so that temperature bumping is - # triggered on retry, which can help avoid the same provider bug. - error_msg = str(e).lower() - if "convert_to_model_response_object" in error_msg: - raise LLMNoResponseError( - f"Provider returned malformed response: {e}" - ) from e - raise + resp = self._transport_call(messages=formatted_messages, **final_kwargs) raw_resp: ModelResponse | None = None if use_mock_tools: diff --git a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py index 11b025d283..d0a2fffd10 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py +++ b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py @@ -1,6 +1,7 @@ from collections.abc import Callable, Iterable from typing import Any, cast +from litellm.exceptions import InternalServerError from tenacity import ( RetryCallState, retry, @@ -19,6 +20,14 @@ RetryListener = Callable[[int, int], None] +def _looks_like_choices_none_error(msg: str) -> bool: + m = msg.lower() + if "choices" not in m: + return False + # Heuristics matching LiteLLM response conversion assertions/validation + return any(k in m for k in ("none", "assert", "invalid")) + + class RetryMixin: """Mixin class for retry logic.""" @@ -50,24 +59,28 @@ def before_sleep(retry_state: RetryCallState) -> None: if exc is None: return - # Only adjust temperature for LLMNoResponseError - if isinstance(exc, LLMNoResponseError): + # Adjust temperature for LLMNoResponseError or certain InternalServerError + should_bump = isinstance(exc, LLMNoResponseError) or ( + isinstance(exc, InternalServerError) + and _looks_like_choices_none_error(str(exc)) + ) + if should_bump: kwargs = getattr(retry_state, "kwargs", None) if isinstance(kwargs, dict): current_temp = kwargs.get("temperature", 0) if current_temp == 0: kwargs["temperature"] = 1.0 logger.warning( - "LLMNoResponseError with temperature=0, " + "LLMNoResponse-like error with temperature=0, " "setting temperature to 1.0 for next attempt." ) else: logger.warning( - f"LLMNoResponseError with temperature={current_temp}, " - "keeping original temperature" + "LLMNoResponse-like error with temperature=" + f"{current_temp}, keeping original temperature" ) - retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry( + tenacity_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry( before_sleep=before_sleep, stop=stop_after_attempt(num_retries), reraise=True, @@ -78,7 +91,21 @@ def before_sleep(retry_state: RetryCallState) -> None: max=retry_max_wait, ), ) - return retry_decorator + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + def wrapped(*args: Any, **kwargs: Any) -> Any: + try: + return func(*args, **kwargs) + except InternalServerError as e: + if _looks_like_choices_none_error(str(e)): + raise LLMNoResponseError( + f"Provider returned malformed response: {e}" + ) from e + raise + + return tenacity_decorator(wrapped) + + return decorator def log_retry_attempt(self, retry_state: RetryCallState) -> None: """Log retry attempts."""