Skip to content

Commit 53c0162

Browse files
tests(llm): add explicit retry behavior tests for empty choices (#1107)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent b24c9cc commit 53c0162

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

openhands-sdk/openhands/sdk/llm/llm.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,9 @@ def _one_attempt(**retry_kwargs) -> ModelResponse:
531531
# 6) telemetry
532532
self._telemetry.on_response(resp, raw_resp=raw_resp)
533533

534-
# Ensure at least one choice
534+
# Ensure at least one choice.
535+
# Gemini sometimes returns empty choices; we raise LLMNoResponseError here
536+
# inside the retry boundary so it is retried.
535537
if not resp.get("choices") or len(resp["choices"]) < 1:
536538
raise LLMNoResponseError(
537539
"Response choices is less than 1. Response: " + str(resp)
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
from unittest.mock import patch
2+
3+
import pytest
4+
from litellm.types.utils import Choices, Message as LiteLLMMessage, ModelResponse, Usage
5+
from pydantic import SecretStr
6+
7+
from openhands.sdk.llm import LLM, LLMResponse, Message, TextContent
8+
from openhands.sdk.llm.exceptions import LLMNoResponseError
9+
10+
11+
def create_mock_response(
12+
content: str = "ok", response_id: str = "r-1"
13+
) -> ModelResponse:
14+
return ModelResponse(
15+
id=response_id,
16+
choices=[
17+
Choices(
18+
finish_reason="stop",
19+
index=0,
20+
message=LiteLLMMessage(content=content, role="assistant"),
21+
)
22+
],
23+
created=1,
24+
model="gpt-4o",
25+
object="chat.completion",
26+
system_fingerprint="t",
27+
usage=Usage(prompt_tokens=1, completion_tokens=1, total_tokens=2),
28+
)
29+
30+
31+
def create_empty_choices_response(response_id: str = "empty-1") -> ModelResponse:
32+
return ModelResponse(
33+
id=response_id,
34+
choices=[], # triggers LLMNoResponseError inside retry boundary
35+
created=1,
36+
model="gpt-4o",
37+
object="chat.completion",
38+
usage=Usage(prompt_tokens=1, completion_tokens=0, total_tokens=1),
39+
)
40+
41+
42+
@pytest.fixture
43+
def base_llm() -> LLM:
44+
return LLM(
45+
usage_id="test-llm",
46+
model="gpt-4o",
47+
api_key=SecretStr("test_key"),
48+
num_retries=2,
49+
retry_min_wait=1,
50+
retry_max_wait=2,
51+
)
52+
53+
54+
@patch("openhands.sdk.llm.llm.litellm_completion")
55+
def test_no_response_retries_then_succeeds(mock_completion, base_llm: LLM) -> None:
56+
mock_completion.side_effect = [
57+
create_empty_choices_response("empty-1"),
58+
create_mock_response("success"),
59+
]
60+
61+
resp = base_llm.completion(
62+
messages=[Message(role="user", content=[TextContent(text="hi")])]
63+
)
64+
65+
assert isinstance(resp, LLMResponse)
66+
assert resp.message is not None
67+
assert mock_completion.call_count == 2 # initial + 1 retry
68+
69+
70+
@patch("openhands.sdk.llm.llm.litellm_completion")
71+
def test_no_response_exhausts_retries_bubbles_llm_no_response(
72+
mock_completion, base_llm: LLM
73+
) -> None:
74+
# Always return empty choices -> keeps raising LLMNoResponseError inside retry
75+
mock_completion.side_effect = [
76+
create_empty_choices_response("empty-1"),
77+
create_empty_choices_response("empty-2"),
78+
]
79+
80+
with pytest.raises(LLMNoResponseError):
81+
base_llm.completion(
82+
messages=[Message(role="user", content=[TextContent(text="hi")])]
83+
)
84+
85+
# Tenacity runs function num_retries times total
86+
assert mock_completion.call_count == base_llm.num_retries
87+
88+
89+
@patch("openhands.sdk.llm.llm.litellm_completion")
90+
def test_no_response_retry_bumps_temperature(mock_completion, base_llm: LLM) -> None:
91+
# Ensure we start at 0.0 to trigger bump to 1.0 on retry
92+
assert base_llm.temperature == 0.0
93+
94+
mock_completion.side_effect = [
95+
create_empty_choices_response("empty-1"),
96+
create_mock_response("ok"),
97+
]
98+
99+
base_llm.completion(
100+
messages=[Message(role="user", content=[TextContent(text="hi")])]
101+
)
102+
103+
# Verify that on the second call, temperature was bumped to 1.0 by RetryMixin
104+
assert mock_completion.call_count == 2
105+
# Grab kwargs from the second call
106+
_, second_kwargs = mock_completion.call_args_list[1]
107+
assert second_kwargs.get("temperature") == 1.0

0 commit comments

Comments
 (0)