Skip to content

Commit f5a032c

Browse files
Handle InternalServerError with choices=None to enable temperature bumping
Some LLM providers (e.g., Gemini) occasionally return malformed responses with choices=None. When this happens, litellm may raise InternalServerError during internal validation before returning the response to our code. Previously, these InternalServerErrors were retried, but without temperature bumping. Since temperature=0.0 is deterministic, all retries would hit the same provider bug and fail. This change detects InternalServerErrors related to malformed choices and converts them to LLMNoResponseError. This enables the RetryMixin's adaptive behavior, which bumps temperature from 0.0 to 1.0 on retry. The temperature change makes the provider's response generation non-deterministic, which can help avoid the same bug on subsequent attempts. This complements PR #1107, which handles the case where litellm successfully returns a response with empty choices. Together, these changes ensure temperature bumping happens regardless of where the choices validation fails. Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 9652de4 commit f5a032c

File tree

2 files changed

+181
-1
lines changed

2 files changed

+181
-1
lines changed

openhands-sdk/openhands/sdk/llm/llm.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,26 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
518518
self._telemetry.on_request(log_ctx=log_ctx)
519519
# Merge retry-modified kwargs (like temperature) with call_kwargs
520520
final_kwargs = {**call_kwargs, **retry_kwargs}
521-
resp = self._transport_call(messages=formatted_messages, **final_kwargs)
521+
try:
522+
resp = self._transport_call(messages=formatted_messages, **final_kwargs)
523+
except InternalServerError as e:
524+
# litellm sometimes raises InternalServerError when it receives
525+
# a malformed response from the provider (e.g., choices=None).
526+
# In these cases, the error typically contains "choices" and
527+
# validation-related keywords. We convert these to
528+
# LLMNoResponseError so that temperature bumping is triggered on
529+
# retry, which can help avoid the same provider bug.
530+
error_msg = str(e).lower()
531+
if "choices" in error_msg and (
532+
"none" in error_msg
533+
or "assert" in error_msg
534+
or "invalid" in error_msg
535+
):
536+
raise LLMNoResponseError(
537+
f"Provider returned malformed response: {e}"
538+
) from e
539+
raise
540+
522541
raw_resp: ModelResponse | None = None
523542
if use_mock_tools:
524543
raw_resp = copy.deepcopy(resp)
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
from unittest.mock import patch
2+
3+
import pytest
4+
from litellm.exceptions import InternalServerError
5+
from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
6+
from pydantic import SecretStr
7+
8+
from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
9+
from openhands.sdk.llm.exceptions import LLMNoResponseError
10+
11+
12+
def create_mock_response(
13+
content: str = "ok", response_id: str = "r-1"
14+
) -> ModelResponse:
15+
return ModelResponse(
16+
id=response_id,
17+
choices=[
18+
Choices(
19+
finish_reason="stop",
20+
index=0,
21+
message=LiteLLMMessage(content=content, role="assistant"),
22+
)
23+
],
24+
created=1,
25+
model="gpt-4o",
26+
object="chat.completion",
27+
system_fingerprint="t",
28+
usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2),
29+
)
30+
31+
32+
@pytest.fixture
33+
def base_llm() -> LLM:
34+
return LLM(
35+
usage_id="test-llm",
36+
model="gpt-4o",
37+
api_key=SecretStr("test_key"),
38+
num_retries=2,
39+
retry_min_wait=1,
40+
retry_max_wait=2,
41+
)
42+
43+
44+
@patch("openhands.sdk.llm.llm.litellm_completion")
45+
def test_internal_server_error_choices_none_retries_then_succeeds(
46+
mock_completion, base_llm: LLM
47+
) -> None:
48+
"""
49+
Test that InternalServerError with choices=None in message is converted
50+
to LLMNoResponseError and retried successfully with temperature bump.
51+
"""
52+
mock_completion.side_effect = [
53+
InternalServerError(
54+
message=(
55+
"Invalid response object: assert response_object['choices'] is not None"
56+
),
57+
llm_provider="test_provider",
58+
model="test_model",
59+
),
60+
create_mock_response("success"),
61+
]
62+
63+
resp = base_llm.completion(
64+
messages=[Message(role="user", content=[TextContent(text="hi")])]
65+
)
66+
67+
assert isinstance(resp, LLMResponse)
68+
assert resp.message is not None
69+
assert mock_completion.call_count == 2
70+
71+
72+
@patch("openhands.sdk.llm.llm.litellm_completion")
73+
def test_internal_server_error_choices_none_exhausts_retries(
74+
mock_completion, base_llm: LLM
75+
) -> None:
76+
"""
77+
Test that when all retries fail with InternalServerError (choices=None),
78+
LLMNoResponseError is raised.
79+
"""
80+
mock_completion.side_effect = [
81+
InternalServerError(
82+
message="Invalid response: choices is None",
83+
llm_provider="test_provider",
84+
model="test_model",
85+
),
86+
InternalServerError(
87+
message="Invalid response: choices is None",
88+
llm_provider="test_provider",
89+
model="test_model",
90+
),
91+
]
92+
93+
with pytest.raises(LLMNoResponseError) as excinfo:
94+
base_llm.completion(
95+
messages=[Message(role="user", content=[TextContent(text="hi")])]
96+
)
97+
98+
assert mock_completion.call_count == base_llm.num_retries
99+
assert "malformed response" in str(excinfo.value).lower()
100+
101+
102+
@patch("openhands.sdk.llm.llm.litellm_completion")
103+
def test_internal_server_error_choices_none_bumps_temperature(
104+
mock_completion, base_llm: LLM
105+
) -> None:
106+
"""
107+
Test that InternalServerError with choices=None triggers temperature bump.
108+
"""
109+
# Ensure we start at 0.0 to trigger bump to 1.0 on retry
110+
assert base_llm.temperature == 0.0
111+
112+
mock_completion.side_effect = [
113+
InternalServerError(
114+
message="assert response_object['choices'] is not None",
115+
llm_provider="test_provider",
116+
model="test_model",
117+
),
118+
create_mock_response("ok"),
119+
]
120+
121+
base_llm.completion(
122+
messages=[Message(role="user", content=[TextContent(text="hi")])]
123+
)
124+
125+
# Verify that on the second call, temperature was bumped to 1.0 by RetryMixin
126+
assert mock_completion.call_count == 2
127+
_, second_kwargs = mock_completion.call_args_list[1]
128+
assert second_kwargs.get("temperature") == 1.0
129+
130+
131+
@patch("openhands.sdk.llm.llm.litellm_completion")
132+
def test_internal_server_error_unrelated_not_converted(
133+
mock_completion, base_llm: LLM
134+
) -> None:
135+
"""
136+
Test that unrelated InternalServerError (not about choices) is NOT
137+
converted to LLMNoResponseError and is retried as InternalServerError.
138+
"""
139+
mock_completion.side_effect = [
140+
InternalServerError(
141+
message="Database connection failed",
142+
llm_provider="test_provider",
143+
model="test_model",
144+
),
145+
InternalServerError(
146+
message="Database connection failed",
147+
llm_provider="test_provider",
148+
model="test_model",
149+
),
150+
]
151+
152+
# Should raise InternalServerError eventually (after mapping to
153+
# LLMServiceUnavailableError), not LLMNoResponseError
154+
with pytest.raises(Exception) as excinfo:
155+
base_llm.completion(
156+
messages=[Message(role="user", content=[TextContent(text="hi")])]
157+
)
158+
159+
# Should NOT be LLMNoResponseError
160+
assert not isinstance(excinfo.value, LLMNoResponseError)
161+
assert mock_completion.call_count == base_llm.num_retries

0 commit comments

Comments
 (0)