Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
# Merge retry-modified kwargs (like temperature) with call_kwargs
final_kwargs = {**call_kwargs, **retry_kwargs}
resp = self._transport_call(messages=formatted_messages, **final_kwargs)

raw_resp: ModelResponse | None = None
if use_mock_tools:
raw_resp = copy.deepcopy(resp)
Expand Down
41 changes: 34 additions & 7 deletions openhands-sdk/openhands/sdk/llm/utils/retry_mixin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Callable, Iterable
from typing import Any, cast

from litellm.exceptions import InternalServerError
from tenacity import (
RetryCallState,
retry,
Expand All @@ -19,6 +20,14 @@
RetryListener = Callable[[int, int], None]


def _looks_like_choices_none_error(msg: str) -> bool:
m = msg.lower()
if "choices" not in m:
return False
# Heuristics matching LiteLLM response conversion assertions/validation
return any(k in m for k in ("none", "assert", "invalid"))


class RetryMixin:
"""Mixin class for retry logic."""

Expand Down Expand Up @@ -50,24 +59,28 @@ def before_sleep(retry_state: RetryCallState) -> None:
if exc is None:
return

# Only adjust temperature for LLMNoResponseError
if isinstance(exc, LLMNoResponseError):
# Adjust temperature for LLMNoResponseError or certain InternalServerError
should_bump = isinstance(exc, LLMNoResponseError) or (
isinstance(exc, InternalServerError)
and _looks_like_choices_none_error(str(exc))
)
if should_bump:
kwargs = getattr(retry_state, "kwargs", None)
if isinstance(kwargs, dict):
current_temp = kwargs.get("temperature", 0)
if current_temp == 0:
kwargs["temperature"] = 1.0
logger.warning(
"LLMNoResponseError with temperature=0, "
"LLMNoResponse-like error with temperature=0, "
"setting temperature to 1.0 for next attempt."
)
else:
logger.warning(
f"LLMNoResponseError with temperature={current_temp}, "
"keeping original temperature"
"LLMNoResponse-like error with temperature="
f"{current_temp}, keeping original temperature"
)

retry_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
tenacity_decorator: Callable[[Callable[..., Any]], Callable[..., Any]] = retry(
before_sleep=before_sleep,
stop=stop_after_attempt(num_retries),
reraise=True,
Expand All @@ -78,7 +91,21 @@ def before_sleep(retry_state: RetryCallState) -> None:
max=retry_max_wait,
),
)
return retry_decorator

def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
def wrapped(*args: Any, **kwargs: Any) -> Any:
try:
return func(*args, **kwargs)
except InternalServerError as e:
if _looks_like_choices_none_error(str(e)):
raise LLMNoResponseError(
f"Provider returned malformed response: {e}"
) from e
raise

return tenacity_decorator(wrapped)

return decorator

def log_retry_attempt(self, retry_state: RetryCallState) -> None:
"""Log retry attempts."""
Expand Down
113 changes: 111 additions & 2 deletions tests/sdk/llm/test_api_connection_error_retry.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from unittest.mock import patch

import pytest
from litellm.exceptions import APIConnectionError
from litellm.exceptions import APIConnectionError, InternalServerError
from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
from pydantic import SecretStr

from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
from openhands.sdk.llm.exceptions import LLMServiceUnavailableError
from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError


def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
Expand Down Expand Up @@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int):
assert isinstance(max_attempts, int)
assert attempt >= 1
assert max_attempts == default_config.num_retries


@patch("openhands.sdk.llm.llm.litellm_completion")
def test_internal_server_error_choices_none_retries_with_temperature_bump(
mock_litellm_completion, default_config
):
"""
Test that InternalServerError from convert_to_model_response_object
is converted to LLMNoResponseError and retried with temperature bump.
"""
# Ensure we start at 0.0 to trigger bump to 1.0 on retry
assert default_config.temperature == 0.0

mock_litellm_completion.side_effect = [
InternalServerError(
message=(
"Invalid response object Traceback (most recent call last):\n"
' File "litellm/litellm_core_utils/llm_response_utils/'
'convert_dict_to_response.py", line 466, in '
"convert_to_model_response_object\n"
' assert response_object["choices"] is not None\n'
"AssertionError"
),
llm_provider="test_provider",
model="test_model",
),
create_mock_response("success"),
]

response = default_config.completion(
messages=[Message(role="user", content=[TextContent(text="hi")])]
)

assert isinstance(response, LLMResponse)
assert response.message is not None
assert mock_litellm_completion.call_count == 2

# Verify that on the second call, temperature was bumped to 1.0
_, second_kwargs = mock_litellm_completion.call_args_list[1]
assert second_kwargs.get("temperature") == 1.0


@patch("openhands.sdk.llm.llm.litellm_completion")
def test_internal_server_error_choices_none_exhausts_retries(
mock_litellm_completion, default_config
):
"""
Test that when all retries fail with InternalServerError from
convert_to_model_response_object, LLMNoResponseError is raised.
"""
mock_litellm_completion.side_effect = [
InternalServerError(
message=(
"File convert_to_model_response_object: "
"assert response_object['choices'] is not None"
),
llm_provider="test_provider",
model="test_model",
),
InternalServerError(
message=(
"File convert_to_model_response_object: "
"assert response_object['choices'] is not None"
),
llm_provider="test_provider",
model="test_model",
),
]

with pytest.raises(LLMNoResponseError) as excinfo:
default_config.completion(
messages=[Message(role="user", content=[TextContent(text="hi")])]
)

assert mock_litellm_completion.call_count == default_config.num_retries
assert "malformed response" in str(excinfo.value).lower()


@patch("openhands.sdk.llm.llm.litellm_completion")
def test_internal_server_error_unrelated_not_converted(
mock_litellm_completion, default_config
):
"""
Test that unrelated InternalServerError (not about choices) is NOT
converted to LLMNoResponseError.
"""
mock_litellm_completion.side_effect = [
InternalServerError(
message="Database connection failed",
llm_provider="test_provider",
model="test_model",
),
InternalServerError(
message="Database connection failed",
llm_provider="test_provider",
model="test_model",
),
]

# Should raise InternalServerError (mapped to LLMServiceUnavailableError),
# not LLMNoResponseError
with pytest.raises(Exception) as excinfo:
default_config.completion(
messages=[Message(role="user", content=[TextContent(text="hi")])]
)

# Should NOT be LLMNoResponseError
assert not isinstance(excinfo.value, LLMNoResponseError)
assert mock_litellm_completion.call_count == default_config.num_retries
Loading