diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index b86555323..3fefdaab9 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -522,6 +522,7 @@ def _one_attempt(**retry_kwargs) -> ModelResponse: # Merge retry-modified kwargs (like temperature) with call_kwargs final_kwargs = {**call_kwargs, **retry_kwargs} resp = self._transport_call(messages=formatted_messages, **final_kwargs) + raw_resp: ModelResponse | None = None if use_mock_tools: raw_resp = copy.deepcopy(resp) diff --git a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py index 11b025d28..d0a2fffd1 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py +++ b/openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py @@ -1,6 +1,7 @@ from collections.abc import Callable, Iterable from typing import Any, cast +from litellm.exceptions import InternalServerError from tenacity import ( RetryCallState, retry, @@ -19,6 +20,14 @@ RetryListener = Callable[[int, int], None] +def _looks_like_choices_none_error(msg: str) -> bool: + m = msg.lower() + if "choices" not in m: + return False + # Heuristics matching LiteLLM response conversion assertions/validation + return any(k in m for k in ("none", "assert", "invalid")) + + class RetryMixin: """Mixin class for retry logic.""" @@ -50,24 +59,28 @@ def before_sleep(retry_state: RetryCallState) -> None: if exc is None: return - # Only adjust temperature for LLMNoResponseError - if isinstance(exc, LLMNoResponseError): + # Adjust temperature for LLMNoResponseError or certain InternalServerError + should_bump = isinstance(exc, LLMNoResponseError) or ( + isinstance(exc, InternalServerError) + and _looks_like_choices_none_error(str(exc)) + ) + if should_bump: kwargs = getattr(retry_state, "kwargs", None) if isinstance(kwargs, dict): current_temp = kwargs.get("temperature", 0) if current_temp == 0: kwargs["temperature"] = 1.0 logger.warning( - "LLMNoResponseError with temperature=0, " + "LLMNoResponse-like error with temperature=0, " "setting temperature to 1.0 for next attempt." ) else: logger.warning( - f"LLMNoResponseError with temperature={current_temp}, " - "keeping original temperature" + "LLMNoResponse-like error with temperature=" + f"{current_temp}, keeping original temperature" ) - retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry( + tenacity_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry( before_sleep=before_sleep, stop=stop_after_attempt(num_retries), reraise=True, @@ -78,7 +91,21 @@ def before_sleep(retry_state: RetryCallState) -> None: max=retry_max_wait, ), ) - return retry_decorator + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + def wrapped(*args: Any, **kwargs: Any) -> Any: + try: + return func(*args, **kwargs) + except InternalServerError as e: + if _looks_like_choices_none_error(str(e)): + raise LLMNoResponseError( + f"Provider returned malformed response: {e}" + ) from e + raise + + return tenacity_decorator(wrapped) + + return decorator def log_retry_attempt(self, retry_state: RetryCallState) -> None: """Log retry attempts.""" diff --git a/tests/sdk/llm/test_api_connection_error_retry.py b/tests/sdk/llm/test_api_connection_error_retry.py index 315490d18..c3e1bc3ce 100644 --- a/tests/sdk/llm/test_api_connection_error_retry.py +++ b/tests/sdk/llm/test_api_connection_error_retry.py @@ -1,12 +1,12 @@ from unittest.mock import patch import pytest -from litellm.exceptions import APIConnectionError +from litellm.exceptions import APIConnectionError, InternalServerError from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage from pydantic import SecretStr from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent -from openhands.sdk.llm.exceptions import LLMServiceUnavailableError +from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError def create_mock_response(content: str = "Test response", response_id: str = "test-id"): @@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int): assert isinstance(max_attempts, int) assert attempt >= 1 assert max_attempts == default_config.num_retries + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_choices_none_retries_with_temperature_bump( + mock_litellm_completion, default_config +): + """ + Test that InternalServerError from convert_to_model_response_object + is converted to LLMNoResponseError and retried with temperature bump. + """ + # Ensure we start at 0.0 to trigger bump to 1.0 on retry + assert default_config.temperature == 0.0 + + mock_litellm_completion.side_effect = [ + InternalServerError( + message=( + "Invalid response object Traceback (most recent call last):\n" + ' File "litellm/litellm_core_utils/llm_response_utils/' + 'convert_dict_to_response.py", line 466, in ' + "convert_to_model_response_object\n" + ' assert response_object["choices"] is not None\n' + "AssertionError" + ), + llm_provider="test_provider", + model="test_model", + ), + create_mock_response("success"), + ] + + response = default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + assert isinstance(response, LLMResponse) + assert response.message is not None + assert mock_litellm_completion.call_count == 2 + + # Verify that on the second call, temperature was bumped to 1.0 + _, second_kwargs = mock_litellm_completion.call_args_list[1] + assert second_kwargs.get("temperature") == 1.0 + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_choices_none_exhausts_retries( + mock_litellm_completion, default_config +): + """ + Test that when all retries fail with InternalServerError from + convert_to_model_response_object, LLMNoResponseError is raised. + """ + mock_litellm_completion.side_effect = [ + InternalServerError( + message=( + "File convert_to_model_response_object: " + "assert response_object['choices'] is not None" + ), + llm_provider="test_provider", + model="test_model", + ), + InternalServerError( + message=( + "File convert_to_model_response_object: " + "assert response_object['choices'] is not None" + ), + llm_provider="test_provider", + model="test_model", + ), + ] + + with pytest.raises(LLMNoResponseError) as excinfo: + default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + assert mock_litellm_completion.call_count == default_config.num_retries + assert "malformed response" in str(excinfo.value).lower() + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_internal_server_error_unrelated_not_converted( + mock_litellm_completion, default_config +): + """ + Test that unrelated InternalServerError (not about choices) is NOT + converted to LLMNoResponseError. + """ + mock_litellm_completion.side_effect = [ + InternalServerError( + message="Database connection failed", + llm_provider="test_provider", + model="test_model", + ), + InternalServerError( + message="Database connection failed", + llm_provider="test_provider", + model="test_model", + ), + ] + + # Should raise InternalServerError (mapped to LLMServiceUnavailableError), + # not LLMNoResponseError + with pytest.raises(Exception) as excinfo: + default_config.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + # Should NOT be LLMNoResponseError + assert not isinstance(excinfo.value, LLMNoResponseError) + assert mock_litellm_completion.call_count == default_config.num_retries