From 897175292c0da4989a60b405a0f3eec07859411d Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 11 Nov 2025 11:13:37 -0500 Subject: [PATCH 1/9] support kimi-k2 extended thinking, fix prompt caching stats, fix max output --- openhands-sdk/openhands/sdk/llm/llm.py | 15 ++++++++----- openhands-sdk/openhands/sdk/llm/message.py | 21 +++++++++++++++++-- .../openhands/sdk/llm/utils/model_features.py | 8 +++++++ .../openhands/sdk/llm/utils/telemetry.py | 4 ++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index b865553237..5ae72cf02a 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -149,7 +149,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): description="Approx max chars in each event/content sent to the LLM.", ) - temperature: float | None = Field(default=0.0, ge=0) + temperature: float | None = Field(default=1.0, ge=0) top_p: float | None = Field(default=1.0, ge=0, le=1) top_k: float | None = Field(default=None, ge=0) @@ -826,7 +826,12 @@ def _init_model_info_and_caps(self) -> None: if self.max_output_tokens is None: if any( m in self.model - for m in ["claude-3-7-sonnet", "claude-3.7-sonnet", "claude-sonnet-4"] + for m in [ + "claude-3-7-sonnet", + "claude-3.7-sonnet", + "claude-sonnet-4", + "kimi-k2-thinking", + ] ): self.max_output_tokens = ( 64000 # practical cap (litellm may allow 128k with header) @@ -932,9 +937,9 @@ def format_messages_for_llm(self, messages: list[Message]) -> list[dict]: message.cache_enabled = self.is_caching_prompt_active() message.vision_enabled = self.vision_is_active() message.function_calling_enabled = self.native_tool_calling - message.force_string_serializer = get_features( - self.model - ).force_string_serializer + model_features = get_features(self.model) + message.force_string_serializer = model_features.force_string_serializer + message.send_reasoning_content = model_features.send_reasoning_content formatted_messages = [message.to_chat_dict() for message in messages] diff --git a/openhands-sdk/openhands/sdk/llm/message.py b/openhands-sdk/openhands/sdk/llm/message.py index fb70135ac2..d14d3a20a3 100644 --- a/openhands-sdk/openhands/sdk/llm/message.py +++ b/openhands-sdk/openhands/sdk/llm/message.py @@ -217,8 +217,21 @@ class Message(BaseModel): # - tool execution result (to LLM) tool_call_id: str | None = None name: str | None = None # name of the tool - # force string serializer - force_string_serializer: bool = False + force_string_serializer: bool = Field( + default=False, + description=( + "Force using string content serializer when sending to LLM API. " + "Useful for providers that do not support list content, " + "like HuggingFace and Groq." + ), + ) + send_reasoning_content: bool = Field( + default=False, + description=( + "Whether to include the full reasoning content when sending to the LLM. " + "Useful for models that support extended reasoning, like Kimi-K2-thinking." + ), + ) # reasoning content (from reasoning models like o1, Claude thinking, DeepSeek R1) reasoning_content: str | None = Field( default=None, @@ -279,6 +292,10 @@ def to_chat_dict(self) -> dict[str, Any]: message_dict["tool_call_id"] = self.tool_call_id message_dict["name"] = self.name + # Required for model like kimi-k2-thinking + if self.send_reasoning_content and self.reasoning_content: + message_dict["reasoning_content"] = self.reasoning_content + return message_dict def _string_serializer(self) -> dict[str, Any]: diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index ccc1290b37..108794b5f6 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -23,6 +23,7 @@ class ModelFeatures: supports_stop_words: bool supports_responses_api: bool force_string_serializer: bool + send_reasoning_content: bool # Pattern tables capturing current behavior. Keep patterns lowercase. @@ -99,6 +100,12 @@ class ModelFeatures: "groq/kimi-k2-instruct", # explicit provider-prefixed IDs ] +# Models that we should send full reasoning content +# in the message input +SEND_REASONING_CONTENT_PATTERNS: list[str] = [ + "kimi-k2-thinking", +] + def get_features(model: str) -> ModelFeatures: """Get model features.""" @@ -111,4 +118,5 @@ def get_features(model: str) -> ModelFeatures: ), supports_responses_api=model_matches(model, RESPONSES_API_PATTERNS), force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_PATTERNS), + send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_PATTERNS), ) diff --git a/openhands-sdk/openhands/sdk/llm/utils/telemetry.py b/openhands-sdk/openhands/sdk/llm/utils/telemetry.py index 7f0707602f..2e6b1ac785 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/telemetry.py +++ b/openhands-sdk/openhands/sdk/llm/utils/telemetry.py @@ -149,6 +149,10 @@ def _record_usage( if p_details is not None: cache_read = int(getattr(p_details, "cached_tokens", 0) or 0) + # Kimi-K2-thinking populate usage.cached_tokens field + if not cache_read and hasattr(usage, "cached_tokens"): + cache_read = int(getattr(usage, "cached_tokens", 0) or 0) + reasoning_tokens = 0 c_details = getattr(usage, "completion_tokens_details", None) or getattr( usage, "output_tokens_details", None From 06bf2ff40baf9294f2f4ec237beb213b952a2fa7 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 11 Nov 2025 16:27:06 +0000 Subject: [PATCH 2/9] Add unit tests for send_reasoning_content functionality - Test that kimi-k2-thinking model has send_reasoning_content=True - Test that reasoning_content is included when send_reasoning_content is True - Test that reasoning_content is excluded when send_reasoning_content is False - Test edge cases: None, empty string, and list serializer scenarios Co-authored-by: openhands --- tests/sdk/llm/test_message.py | 103 +++++++++++++++++++++++++++ tests/sdk/llm/test_model_features.py | 20 ++++++ 2 files changed, 123 insertions(+) diff --git a/tests/sdk/llm/test_message.py b/tests/sdk/llm/test_message.py index 2cdb1cd4ec..d87e1e924c 100644 --- a/tests/sdk/llm/test_message.py +++ b/tests/sdk/llm/test_message.py @@ -267,3 +267,106 @@ def test_text_content_truncation_exact_limit(): # Check that text was not truncated assert len(result) == 1 assert result[0]["text"] == exact_text + + +def test_message_with_reasoning_content_when_enabled(): + """Test that reasoning_content is included when send_reasoning_content is True.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content="Let me think step by step...", + send_reasoning_content=True, + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert result["content"] == "Final answer" + assert result["reasoning_content"] == "Let me think step by step..." + + +def test_message_with_reasoning_content_when_disabled(): + """Test that reasoning_content is NOT included when send_reasoning_content is False.""" # noqa: E501 + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content="Let me think step by step...", + send_reasoning_content=False, + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert result["content"] == "Final answer" + assert "reasoning_content" not in result + + +def test_message_with_reasoning_content_default_disabled(): + """Test that reasoning_content is NOT included by default.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content="Let me think step by step...", + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert result["content"] == "Final answer" + assert "reasoning_content" not in result + + +def test_message_with_reasoning_content_none(): + """Test that reasoning_content is NOT included when it's None even if enabled.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content=None, + send_reasoning_content=True, + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert result["content"] == "Final answer" + assert "reasoning_content" not in result + + +def test_message_with_reasoning_content_empty_string(): + """Test that reasoning_content is NOT included when it's an empty string.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content="", + send_reasoning_content=True, + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert result["content"] == "Final answer" + assert "reasoning_content" not in result + + +def test_message_with_reasoning_content_list_serializer(): + """Test that reasoning_content works with list serializer.""" + from openhands.sdk.llm.message import Message, TextContent + + message = Message( + role="assistant", + content=[TextContent(text="Final answer")], + reasoning_content="Step by step reasoning", + send_reasoning_content=True, + function_calling_enabled=True, # Forces list serializer + ) + + result = message.to_chat_dict() + assert result["role"] == "assistant" + assert isinstance(result["content"], list) + assert result["content"][0]["text"] == "Final answer" + assert result["reasoning_content"] == "Step by step reasoning" diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index 7a6c424c96..a5714ed769 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -240,3 +240,23 @@ def test_force_string_serializer_full_model_names(): assert get_features("Kimi K2-Instruct-0905").force_string_serializer is False # Groq-prefixed Kimi should force string serializer assert get_features("groq/kimi-k2-instruct-0905").force_string_serializer is True + + +@pytest.mark.parametrize( + "model,expected_send_reasoning", + [ + ("kimi-k2-thinking", True), + ("kimi-k2-thinking-0905", True), + ("Kimi-K2-Thinking", True), # Case insensitive + ("moonshot/kimi-k2-thinking", True), # With provider prefix + ("kimi-k2-instruct", False), # Different variant + ("gpt-4o", False), + ("claude-3-5-sonnet", False), + ("o1", False), + ("unknown-model", False), + ], +) +def test_send_reasoning_content_support(model, expected_send_reasoning): + """Test that models like kimi-k2-thinking require send_reasoning_content.""" + features = get_features(model) + assert features.send_reasoning_content is expected_send_reasoning From a9b74bb07680cfba6d577f615594d297318127f0 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 11 Nov 2025 16:35:37 +0000 Subject: [PATCH 3/9] Fix tests affected by default temperature change from 0.0 to 1.0 Updated test expectations to match the new default temperature of 1.0: - test_llm_config_defaults: Update default temperature assertion - test_llm_local_detection_based_on_model_name: Update temperature check - test_no_response_retry_bumps_temperature: Explicitly set temperature=0.0 in fixture to properly test the temperature bump behavior on retry Co-authored-by: openhands --- tests/sdk/config/test_llm_config.py | 2 +- tests/sdk/llm/test_llm.py | 2 +- tests/sdk/llm/test_llm_no_response_retry.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/sdk/config/test_llm_config.py b/tests/sdk/config/test_llm_config.py index 46a08c8126..1f0427bf33 100644 --- a/tests/sdk/config/test_llm_config.py +++ b/tests/sdk/config/test_llm_config.py @@ -20,7 +20,7 @@ def test_llm_config_defaults(): assert config.retry_max_wait == 64 assert config.timeout is None assert config.max_message_chars == 30_000 - assert config.temperature == 0.0 + assert config.temperature == 1.0 assert config.top_p == 1.0 assert config.top_k is None assert config.custom_llm_provider is None diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index e5a2eef089..5440ce3284 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -534,7 +534,7 @@ def test_llm_local_detection_based_on_model_name(default_llm): # Test basic model configuration assert llm.model == "gpt-4o" - assert llm.temperature == 0.0 + assert llm.temperature == 1.0 # Test with localhost base_url local_llm = default_llm.model_copy(update={"base_url": "http://localhost:8000"}) diff --git a/tests/sdk/llm/test_llm_no_response_retry.py b/tests/sdk/llm/test_llm_no_response_retry.py index a14e6ecaea..c8f5809554 100644 --- a/tests/sdk/llm/test_llm_no_response_retry.py +++ b/tests/sdk/llm/test_llm_no_response_retry.py @@ -48,6 +48,7 @@ def base_llm() -> LLM: num_retries=2, retry_min_wait=1, retry_max_wait=2, + temperature=0.0, # Explicitly set to test temperature bump behavior ) From 2a854e55c3db8f755ca1e3e04f084ae33e4e4d3e Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 11 Nov 2025 11:42:13 -0500 Subject: [PATCH 4/9] revert temperature change --- openhands-sdk/openhands/sdk/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index 5ae72cf02a..b80b7ba961 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -149,7 +149,7 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): description="Approx max chars in each event/content sent to the LLM.", ) - temperature: float | None = Field(default=1.0, ge=0) + temperature: float | None = Field(default=0.0, ge=0) top_p: float | None = Field(default=1.0, ge=0, le=1) top_k: float | None = Field(default=None, ge=0) From ac82e139d10cec47e52b4ab6f6491ae968e13d22 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 11 Nov 2025 11:47:26 -0500 Subject: [PATCH 5/9] simplify verified model --- .../openhands/sdk/llm/utils/verified_models.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py index c539f36026..b1b53eb1e4 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py +++ b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py @@ -37,19 +37,13 @@ VERIFIED_OPENHANDS_MODELS = [ "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001", - "gpt-5-codex", "gpt-5-2025-08-07", + "gpt-5-codex", + "kimi-k2-thinking", "gpt-5-mini-2025-08-07", - "claude-sonnet-4-20250514", - "claude-opus-4-20250514", "claude-opus-4-1-20250805", "devstral-small-2507", "devstral-medium-2507", - "o3", - "o4-mini", - "gemini-2.5-pro", - "kimi-k2-0711-preview", - "qwen3-coder-480b", ] From 391cc5b3c3a6f87ccb7558f74c3b1811656e594d Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 11 Nov 2025 11:53:27 -0500 Subject: [PATCH 6/9] set model-specific default temperature --- openhands-sdk/openhands/sdk/llm/llm.py | 17 +++++++++++++--- .../openhands/sdk/llm/utils/model_features.py | 20 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/openhands-sdk/openhands/sdk/llm/llm.py b/openhands-sdk/openhands/sdk/llm/llm.py index b80b7ba961..c30e7a54e4 100644 --- a/openhands-sdk/openhands/sdk/llm/llm.py +++ b/openhands-sdk/openhands/sdk/llm/llm.py @@ -73,7 +73,7 @@ from openhands.sdk.llm.options.chat_options import select_chat_options from openhands.sdk.llm.options.responses_options import select_responses_options from openhands.sdk.llm.utils.metrics import Metrics, MetricsSnapshot -from openhands.sdk.llm.utils.model_features import get_features +from openhands.sdk.llm.utils.model_features import get_default_temperature, get_features from openhands.sdk.llm.utils.retry_mixin import RetryMixin from openhands.sdk.llm.utils.telemetry import Telemetry from openhands.sdk.logger import ENV_LOG_DIR, get_logger @@ -149,7 +149,14 @@ class LLM(BaseModel, RetryMixin, NonNativeToolCallingMixin): description="Approx max chars in each event/content sent to the LLM.", ) - temperature: float | None = Field(default=0.0, ge=0) + temperature: float | None = Field( + default=None, + ge=0, + description=( + "Sampling temperature for response generation. " + "Defaults to 0 for most models and provider default for reasoning models." + ), + ) top_p: float | None = Field(default=1.0, ge=0, le=1) top_k: float | None = Field(default=None, ge=0) @@ -375,9 +382,13 @@ def _set_env_side_effects(self): # Capabilities + model info self._init_model_info_and_caps() + if self.temperature is None: + self.temperature = get_default_temperature(self.model) + logger.debug( f"LLM ready: model={self.model} base_url={self.base_url} " - f"reasoning_effort={self.reasoning_effort}" + f"reasoning_effort={self.reasoning_effort} " + f"temperature={self.temperature}" ) return self diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index 108794b5f6..352a913b4f 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -120,3 +120,23 @@ def get_features(model: str) -> ModelFeatures: force_string_serializer=model_matches(model, FORCE_STRING_SERIALIZER_PATTERNS), send_reasoning_content=model_matches(model, SEND_REASONING_CONTENT_PATTERNS), ) + + +# Default temperature mapping. +# Each entry: (pattern, default_temperature) +# The last pattern "*" acts as a wildcard fallback. +DEFAULT_TEMPERATURE_PATTERNS: list[tuple[str, float]] = [ + ("kimi-k2-thinking", 1.0), +] + + +def get_default_temperature(model: str) -> float: + """Return the default temperature for a given model pattern. + + Uses case-insensitive substring matching via model_matches. + The last entry with '*' is treated as a wildcard fallback. + """ + for pattern, value in DEFAULT_TEMPERATURE_PATTERNS: + if model_matches(model, [pattern]): + return value + return 0.0 From 868e4beda208f1d305969231ed3f01e6e252a387 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Wed, 12 Nov 2025 00:54:35 +0800 Subject: [PATCH 7/9] Apply suggestion from @xingyaoww --- openhands-sdk/openhands/sdk/llm/utils/model_features.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index 352a913b4f..d565e0dec9 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -124,7 +124,6 @@ def get_features(model: str) -> ModelFeatures: # Default temperature mapping. # Each entry: (pattern, default_temperature) -# The last pattern "*" acts as a wildcard fallback. DEFAULT_TEMPERATURE_PATTERNS: list[tuple[str, float]] = [ ("kimi-k2-thinking", 1.0), ] From e158c9b04d03edab1574c39d206b0133b83fb42a Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 11 Nov 2025 17:05:33 +0000 Subject: [PATCH 8/9] Fix tests for default temperature implementation - Update test_llm_config_defaults to expect temperature=0.0 - Update test_llm_local_detection_based_on_model_name to expect temperature=0.0 - Fix mock_llm fixture in condenser tests to include temperature attribute - Add comprehensive tests for get_default_temperature function: * Test kimi-k2-thinking models default to 1.0 * Test all other models default to 0.0 * Test with provider prefixes (moonshot/, litellm_proxy/) * Test case insensitivity * Test fallback for unknown models All previously failing tests now pass. Co-authored-by: openhands --- tests/sdk/config/test_llm_config.py | 2 +- .../test_llm_summarizing_condenser.py | 1 + tests/sdk/llm/test_llm.py | 2 +- tests/sdk/llm/test_model_features.py | 49 +++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/tests/sdk/config/test_llm_config.py b/tests/sdk/config/test_llm_config.py index 1f0427bf33..46a08c8126 100644 --- a/tests/sdk/config/test_llm_config.py +++ b/tests/sdk/config/test_llm_config.py @@ -20,7 +20,7 @@ def test_llm_config_defaults(): assert config.retry_max_wait == 64 assert config.timeout is None assert config.max_message_chars == 30_000 - assert config.temperature == 1.0 + assert config.temperature == 0.0 assert config.top_p == 1.0 assert config.top_k is None assert config.custom_llm_provider is None diff --git a/tests/sdk/context/condenser/test_llm_summarizing_condenser.py b/tests/sdk/context/condenser/test_llm_summarizing_condenser.py index 8b8f344b05..18e1c02afa 100644 --- a/tests/sdk/context/condenser/test_llm_summarizing_condenser.py +++ b/tests/sdk/context/condenser/test_llm_summarizing_condenser.py @@ -65,6 +65,7 @@ def create_completion_result(content: str) -> LLMResponse: mock_llm.base_url = None mock_llm.reasoning_effort = None mock_llm.litellm_extra_body = {} + mock_llm.temperature = 0.0 # Explicitly set pricing attributes required by LLM -> Telemetry wiring mock_llm.input_cost_per_token = None diff --git a/tests/sdk/llm/test_llm.py b/tests/sdk/llm/test_llm.py index 5440ce3284..e5a2eef089 100644 --- a/tests/sdk/llm/test_llm.py +++ b/tests/sdk/llm/test_llm.py @@ -534,7 +534,7 @@ def test_llm_local_detection_based_on_model_name(default_llm): # Test basic model configuration assert llm.model == "gpt-4o" - assert llm.temperature == 1.0 + assert llm.temperature == 0.0 # Test with localhost base_url local_llm = default_llm.model_copy(update={"base_url": "http://localhost:8000"}) diff --git a/tests/sdk/llm/test_model_features.py b/tests/sdk/llm/test_model_features.py index a5714ed769..6f17e205f3 100644 --- a/tests/sdk/llm/test_model_features.py +++ b/tests/sdk/llm/test_model_features.py @@ -1,6 +1,7 @@ import pytest from openhands.sdk.llm.utils.model_features import ( + get_default_temperature, get_features, model_matches, ) @@ -260,3 +261,51 @@ def test_send_reasoning_content_support(model, expected_send_reasoning): """Test that models like kimi-k2-thinking require send_reasoning_content.""" features = get_features(model) assert features.send_reasoning_content is expected_send_reasoning + + +@pytest.mark.parametrize( + "model,expected_temperature", + [ + # kimi-k2-thinking models should default to 1.0 + ("kimi-k2-thinking", 1.0), + ("kimi-k2-thinking-0905", 1.0), + ("Kimi-K2-Thinking", 1.0), # Case insensitive + ("moonshot/kimi-k2-thinking", 1.0), # With provider prefix + ("litellm_proxy/kimi-k2-thinking", 1.0), # With litellm proxy prefix + # All other models should default to 0.0 + ("kimi-k2-instruct", 0.0), # Different kimi variant + ("gpt-4", 0.0), + ("gpt-4o", 0.0), + ("gpt-4o-mini", 0.0), + ("claude-3-5-sonnet", 0.0), + ("claude-3-7-sonnet", 0.0), + ("gemini-1.5-pro", 0.0), + ("gemini-2.5-pro-experimental", 0.0), + ("o1", 0.0), + ("o1-mini", 0.0), + ("o3", 0.0), + ("deepseek-chat", 0.0), + ("llama-3.1-70b", 0.0), + ("azure/gpt-4", 0.0), + ("openai/gpt-4o", 0.0), + ("anthropic/claude-3-5-sonnet", 0.0), + ("unknown-model", 0.0), + ], +) +def test_get_default_temperature(model, expected_temperature): + """Test that get_default_temperature returns correct values for different models.""" + assert get_default_temperature(model) == expected_temperature + + +def test_get_default_temperature_fallback(): + """Test that get_default_temperature returns 0.0 for unknown models.""" + assert get_default_temperature("completely-unknown-model-12345") == 0.0 + assert get_default_temperature("some-random-model") == 0.0 + + +def test_get_default_temperature_case_insensitive(): + """Test that get_default_temperature is case insensitive.""" + assert get_default_temperature("kimi-k2-thinking") == 1.0 + assert get_default_temperature("KIMI-K2-THINKING") == 1.0 + assert get_default_temperature("Kimi-K2-Thinking") == 1.0 + assert get_default_temperature("KiMi-k2-ThInKiNg") == 1.0 From eb6e7973d2f0ce89c15f2b183ec4513ea8f978da Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Wed, 12 Nov 2025 01:23:24 +0800 Subject: [PATCH 9/9] Apply suggestion from @xingyaoww --- openhands-sdk/openhands/sdk/llm/utils/model_features.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/llm/utils/model_features.py b/openhands-sdk/openhands/sdk/llm/utils/model_features.py index d565e0dec9..00056706cf 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/model_features.py +++ b/openhands-sdk/openhands/sdk/llm/utils/model_features.py @@ -133,7 +133,6 @@ def get_default_temperature(model: str) -> float: """Return the default temperature for a given model pattern. Uses case-insensitive substring matching via model_matches. - The last entry with '*' is treated as a wildcard fallback. """ for pattern, value in DEFAULT_TEMPERATURE_PATTERNS: if model_matches(model, [pattern]):