Skip to content

Commit 1140756

Browse files
Handle InternalServerError with choices=None to enable temperature bumping
Some LLM providers occasionally return malformed responses with choices=None. When this happens, litellm raises InternalServerError during internal validation in convert_to_model_response_object before returning the response to our code. Previously, these InternalServerErrors were retried, but without temperature bumping. Since temperature=0.0 is deterministic, all retries would hit the same provider bug and fail. This change detects InternalServerErrors from convert_to_model_response_object and converts them to LLMNoResponseError. This enables the RetryMixin's adaptive behavior, which bumps temperature from 0.0 to 1.0 on retry. The temperature change makes the provider's response generation non-deterministic, which can help avoid the same bug on subsequent attempts. The detection checks for 'convert_to_model_response_object' in the error message, which is the specific function where litellm validates the response_object['choices'] field. This complements PR #1107, which handles the case where litellm successfully returns a response with empty choices. Together, these changes ensure temperature bumping happens regardless of where the choices validation fails. Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 9652de4 commit 1140756

File tree

2 files changed

+127
-3
lines changed

2 files changed

+127
-3
lines changed

openhands-sdk/openhands/sdk/llm/llm.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,22 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
518518
self._telemetry.on_request(log_ctx=log_ctx)
519519
# Merge retry-modified kwargs (like temperature) with call_kwargs
520520
final_kwargs = {**call_kwargs, **retry_kwargs}
521-
resp = self._transport_call(messages=formatted_messages, **final_kwargs)
521+
try:
522+
resp = self._transport_call(messages=formatted_messages, **final_kwargs)
523+
except InternalServerError as e:
524+
# litellm sometimes raises InternalServerError when it receives
525+
# a malformed response from the provider (e.g., choices=None).
526+
# This happens in convert_to_model_response_object when
527+
# validating response_object["choices"]. We convert this specific
528+
# error to LLMNoResponseError so that temperature bumping is
529+
# triggered on retry, which can help avoid the same provider bug.
530+
error_msg = str(e).lower()
531+
if "convert_to_model_response_object" in error_msg:
532+
raise LLMNoResponseError(
533+
f"Provider returned malformed response: {e}"
534+
) from e
535+
raise
536+
522537
raw_resp: ModelResponse | None = None
523538
if use_mock_tools:
524539
raw_resp = copy.deepcopy(resp)

tests/sdk/llm/test_api_connection_error_retry.py

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from unittest.mock import patch
22

33
import pytest
4-
from litellm.exceptions import APIConnectionError
4+
from litellm.exceptions import APIConnectionError, InternalServerError
55
from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
66
from pydantic import SecretStr
77

88
from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
9-
from openhands.sdk.llm.exceptions import LLMServiceUnavailableError
9+
from openhands.sdk.llm.exceptions import LLMNoResponseError, LLMServiceUnavailableError
1010

1111

1212
def create_mock_response(content: str = "Test response", response_id: str = "test-id"):
@@ -255,3 +255,112 @@ def retry_listener(attempt: int, max_attempts: int):
255255
assert isinstance(max_attempts, int)
256256
assert attempt >= 1
257257
assert max_attempts == default_config.num_retries
258+
259+
260+
@patch("openhands.sdk.llm.llm.litellm_completion")
261+
def test_internal_server_error_choices_none_retries_with_temperature_bump(
262+
mock_litellm_completion, default_config
263+
):
264+
"""
265+
Test that InternalServerError from convert_to_model_response_object
266+
is converted to LLMNoResponseError and retried with temperature bump.
267+
"""
268+
# Ensure we start at 0.0 to trigger bump to 1.0 on retry
269+
assert default_config.temperature == 0.0
270+
271+
mock_litellm_completion.side_effect = [
272+
InternalServerError(
273+
message=(
274+
"Invalid response object Traceback (most recent call last):\n"
275+
' File "litellm/litellm_core_utils/llm_response_utils/'
276+
'convert_dict_to_response.py", line 466, in '
277+
"convert_to_model_response_object\n"
278+
' assert response_object["choices"] is not None\n'
279+
"AssertionError"
280+
),
281+
llm_provider="test_provider",
282+
model="test_model",
283+
),
284+
create_mock_response("success"),
285+
]
286+
287+
response = default_config.completion(
288+
messages=[Message(role="user", content=[TextContent(text="hi")])]
289+
)
290+
291+
assert isinstance(response, LLMResponse)
292+
assert response.message is not None
293+
assert mock_litellm_completion.call_count == 2
294+
295+
# Verify that on the second call, temperature was bumped to 1.0
296+
_, second_kwargs = mock_litellm_completion.call_args_list[1]
297+
assert second_kwargs.get("temperature") == 1.0
298+
299+
300+
@patch("openhands.sdk.llm.llm.litellm_completion")
301+
def test_internal_server_error_choices_none_exhausts_retries(
302+
mock_litellm_completion, default_config
303+
):
304+
"""
305+
Test that when all retries fail with InternalServerError from
306+
convert_to_model_response_object, LLMNoResponseError is raised.
307+
"""
308+
mock_litellm_completion.side_effect = [
309+
InternalServerError(
310+
message=(
311+
"File convert_to_model_response_object: "
312+
"assert response_object['choices'] is not None"
313+
),
314+
llm_provider="test_provider",
315+
model="test_model",
316+
),
317+
InternalServerError(
318+
message=(
319+
"File convert_to_model_response_object: "
320+
"assert response_object['choices'] is not None"
321+
),
322+
llm_provider="test_provider",
323+
model="test_model",
324+
),
325+
]
326+
327+
with pytest.raises(LLMNoResponseError) as excinfo:
328+
default_config.completion(
329+
messages=[Message(role="user", content=[TextContent(text="hi")])]
330+
)
331+
332+
assert mock_litellm_completion.call_count == default_config.num_retries
333+
assert "malformed response" in str(excinfo.value).lower()
334+
335+
336+
@patch("openhands.sdk.llm.llm.litellm_completion")
337+
def test_internal_server_error_unrelated_not_converted(
338+
mock_litellm_completion, default_config
339+
):
340+
"""
341+
Test that unrelated InternalServerError (not about choices) is NOT
342+
converted to LLMNoResponseError.
343+
"""
344+
mock_litellm_completion.side_effect = [
345+
InternalServerError(
346+
message="Database connection failed",
347+
llm_provider="test_provider",
348+
model="test_model",
349+
),
350+
InternalServerError(
351+
message="Database connection failed",
352+
llm_provider="test_provider",
353+
model="test_model",
354+
),
355+
]
356+
357+
# Should raise InternalServerError (mapped to LLMServiceUnavailableError),
358+
# not LLMNoResponseError
359+
with pytest.raises(Exception) as excinfo:
360+
default_config.completion(
361+
messages=[Message(role="user", content=[TextContent(text="hi")])]
362+
)
363+
364+
# Should NOT be LLMNoResponseError
365+
assert not isinstance(excinfo.value, LLMNoResponseError)
366+
assert mock_litellm_completion.call_count == default_config.num_retries

0 commit comments

Comments
 (0)