Skip to content

Commit f16a2da

Browse files
Add remaining LLM fixes and LLM registry (#70)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: Xingyao Wang <xingyaoww@gmail.com>
1 parent 67364a2 commit f16a2da

File tree

9 files changed

+499
-53
lines changed

9 files changed

+499
-53
lines changed

.github/workflows/precommit.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ name: Pre-commit checks
33

44
on:
55
push:
6-
branches: ["**"] # all branches
6+
branches: ["main"]
7+
pull_request:
8+
branches: ["**"]
79

810
jobs:
911
pre-commit:
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import os
2+
3+
from pydantic import SecretStr
4+
5+
from openhands.core import (
6+
CodeActAgent,
7+
Conversation,
8+
EventType,
9+
LLMConfig,
10+
LLMConvertibleEvent,
11+
LLMRegistry,
12+
Message,
13+
TextContent,
14+
Tool,
15+
get_logger,
16+
)
17+
from openhands.tools import (
18+
BashExecutor,
19+
FileEditorExecutor,
20+
execute_bash_tool,
21+
str_replace_editor_tool,
22+
)
23+
24+
25+
logger = get_logger(__name__)
26+
27+
# Configure LLM using LLMRegistry
28+
api_key = os.getenv("LITELLM_API_KEY")
29+
assert api_key is not None, "LITELLM_API_KEY environment variable is not set."
30+
31+
# Create LLM registry
32+
llm_registry = LLMRegistry()
33+
34+
# Get LLM from registry (this will create and cache the LLM)
35+
llm_config = LLMConfig(
36+
model="litellm_proxy/anthropic/claude-sonnet-4-20250514",
37+
base_url="https://llm-proxy.eval.all-hands.dev",
38+
api_key=SecretStr(api_key),
39+
)
40+
llm = llm_registry.get_llm(service_id="main_agent", config=llm_config)
41+
42+
# Tools
43+
cwd = os.getcwd()
44+
bash = BashExecutor(working_dir=cwd)
45+
file_editor = FileEditorExecutor()
46+
tools: list[Tool] = [
47+
execute_bash_tool.set_executor(executor=bash),
48+
str_replace_editor_tool.set_executor(executor=file_editor),
49+
]
50+
51+
# Agent
52+
agent = CodeActAgent(llm=llm, tools=tools)
53+
54+
llm_messages = [] # collect raw LLM messages
55+
56+
57+
def conversation_callback(event: EventType):
58+
logger.info(f"Found a conversation message: {str(event)[:200]}...")
59+
if isinstance(event, LLMConvertibleEvent):
60+
llm_messages.append(event.to_llm_message())
61+
62+
63+
conversation = Conversation(agent=agent, callbacks=[conversation_callback])
64+
65+
conversation.send_message(
66+
message=Message(
67+
role="user",
68+
content=[
69+
TextContent(
70+
text="Hello! Can you create a new Python file named "
71+
"hello_registry.py that prints 'Hello from LLM Registry!'?"
72+
)
73+
],
74+
)
75+
)
76+
conversation.run()
77+
78+
print("=" * 100)
79+
print("Conversation finished. Got the following LLM messages:")
80+
for i, message in enumerate(llm_messages):
81+
print(f"Message {i}: {str(message)[:200]}")
82+
83+
print("=" * 100)
84+
print(f"LLM Registry services: {llm_registry.list_services()}")
85+
86+
# Demonstrate getting the same LLM instance from registry
87+
same_llm = llm_registry.get_llm(service_id="main_agent", config=llm_config)
88+
print(f"Same LLM instance: {llm is same_llm}")
89+
90+
# Demonstrate requesting a completion directly from registry
91+
completion_response = llm_registry.request_extraneous_completion(
92+
service_id="completion_service",
93+
llm_config=llm_config,
94+
messages=[{"role": "user", "content": "Say hello in one word."}],
95+
)
96+
print(f"Direct completion response: {completion_response}")

openhands/core/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .config import LLMConfig, MCPConfig
55
from .conversation import Conversation, ConversationCallbackType
66
from .event import EventBase, EventType, LLMConvertibleEvent
7-
from .llm import LLM, ImageContent, Message, TextContent
7+
from .llm import LLM, ImageContent, LLMRegistry, Message, RegistryEvent, TextContent
88
from .logger import get_logger
99
from .tool import ActionBase, ObservationBase, Tool
1010

@@ -16,6 +16,8 @@
1616

1717
__all__ = [
1818
"LLM",
19+
"LLMRegistry",
20+
"RegistryEvent",
1921
"Message",
2022
"TextContent",
2123
"ImageContent",

openhands/core/agent/codeact_agent/codeact_agent.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,11 @@ def step(
8787
list[LLMConvertibleEvent],
8888
[e for e in state.events if isinstance(e, LLMConvertibleEvent)],
8989
)
90-
_messages = self.llm.format_messages_for_llm(
91-
LLMConvertibleEvent.events_to_messages(llm_convertible_events)
90+
_messages = LLMConvertibleEvent.events_to_messages(llm_convertible_events)
91+
logger.debug(
92+
"Sending messages to LLM: "
93+
f"{json.dumps([m.model_dump() for m in _messages], indent=2)}"
9294
)
93-
logger.debug(f"Sending messages to LLM: {json.dumps(_messages, indent=2)}")
9495
response: ModelResponse = self.llm.completion(
9596
messages=_messages,
9697
tools=[tool.to_openai_tool() for tool in self.tools.values()],

openhands/core/config/llm_config.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
import os
2-
from typing import Any
32

4-
from pydantic import BaseModel, ConfigDict, Field, SecretStr
3+
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
54

65
from openhands.core.logger import ENV_LOG_DIR, get_logger
76

87

98
logger = get_logger(__name__)
109

1110

12-
class LLMConfig(BaseModel):
13-
"""Configuration for the LLM model.
11+
class LLMConfig(BaseModel, frozen=True):
12+
"""Immutable configuration for the LLM model.
1413
1514
Attributes:
1615
model: The model to use.
@@ -99,32 +98,50 @@ class LLMConfig(BaseModel):
9998

10099
model_config = ConfigDict(extra="forbid")
101100

102-
def model_post_init(self, __context: Any) -> None:
103-
"""Post-initialization hook to assign OpenRouter-related variables to
104-
environment variables.
101+
# 1) Pre-validation: transform inputs for a frozen model
102+
@model_validator(mode="before")
103+
@classmethod
104+
def _coerce_inputs(cls, data):
105+
# data can be dict or BaseModel – normalize to dict
106+
if not isinstance(data, dict):
107+
return data
108+
d = dict(data)
105109

106-
This ensures that these values are accessible to litellm at runtime.
107-
"""
108-
super().model_post_init(__context)
110+
model_val = d.get("model", None)
111+
if model_val is None:
112+
raise ValueError("model must be specified in LLMConfig")
109113

110-
# Assign OpenRouter-specific variables to environment variables
114+
# reasoning_effort default (unless Gemini)
115+
if d.get("reasoning_effort") is None and "gemini-2.5-pro" not in model_val:
116+
d["reasoning_effort"] = "high"
117+
118+
# Azure default api_version
119+
if model_val.startswith("azure") and not d.get("api_version"):
120+
d["api_version"] = "2024-12-01-preview"
121+
122+
# Provider rewrite: openhands/* -> litellm_proxy/*
123+
if model_val.startswith("openhands/"):
124+
model_name = model_val.removeprefix("openhands/")
125+
d["model"] = f"litellm_proxy/{model_name}"
126+
# don't overwrite if caller explicitly set base_url
127+
d.setdefault("base_url", "https://llm-proxy.app.all-hands.dev/")
128+
129+
# HF doesn't support the OpenAI default value for top_p (1)
130+
if model_val.startswith("huggingface"):
131+
logger.debug(f"Setting top_p to 0.9 for Hugging Face model: {model_val}")
132+
_cur_top_p = d.get("top_p", 1.0)
133+
d["top_p"] = 0.9 if _cur_top_p == 1 else _cur_top_p
134+
135+
return d
136+
137+
# 2) Post-validation: side effects only; must return self
138+
@model_validator(mode="after")
139+
def _set_env_side_effects(self):
111140
if self.openrouter_site_url:
112141
os.environ["OR_SITE_URL"] = self.openrouter_site_url
113142
if self.openrouter_app_name:
114143
os.environ["OR_APP_NAME"] = self.openrouter_app_name
115144

116-
# Set reasoning_effort to 'high' by default for non-Gemini models
117-
# Gemini models use optimized thinking budget when reasoning_effort is None
118-
if self.reasoning_effort is None and "gemini-2.5-pro" not in self.model:
119-
self.reasoning_effort = "high"
120-
121-
# Set an API version by default for Azure models
122-
# Required for newer models.
123-
# Azure issue: https://github.com/All-Hands-AI/OpenHands/issues/7755
124-
if self.model.startswith("azure") and self.api_version is None:
125-
self.api_version = "2024-12-01-preview"
126-
127-
# Set AWS credentials as environment variables for LiteLLM Bedrock
128145
if self.aws_access_key_id:
129146
os.environ["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id.get_secret_value()
130147
if self.aws_secret_access_key:
@@ -133,3 +150,11 @@ def model_post_init(self, __context: Any) -> None:
133150
)
134151
if self.aws_region_name:
135152
os.environ["AWS_REGION_NAME"] = self.aws_region_name
153+
154+
logger.debug(
155+
f"LLMConfig finalized with model={self.model} "
156+
f"base_url={self.base_url} "
157+
f"api_version={self.api_version} "
158+
f"reasoning_effort={self.reasoning_effort}",
159+
)
160+
return self

openhands/core/llm/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from .llm import LLM
2+
from .llm_registry import LLMRegistry, RegistryEvent
23
from .message import ImageContent, Message, TextContent
34
from .metadata import get_llm_metadata
45

56

67
__all__ = [
78
"LLM",
9+
"LLMRegistry",
10+
"RegistryEvent",
811
"Message",
912
"TextContent",
1013
"ImageContent",

openhands/core/llm/llm.py

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import time
44
import warnings
55
from functools import partial
6-
from typing import Any, Callable, TypeGuard
6+
from typing import Any, Callable, TypeGuard, cast
77

88
import httpx
99

@@ -100,6 +100,8 @@ def __init__(
100100

101101
self.model_info: ModelInfo | None = None
102102
self._function_calling_active: bool = False
103+
self._max_input_tokens: int | None = self.config.max_input_tokens
104+
self._max_output_tokens: int | None = self.config.max_output_tokens
103105
self.retry_listener = retry_listener
104106
if self.config.log_completions:
105107
if self.config.log_completions_folder is None:
@@ -139,15 +141,6 @@ def __init__(
139141
# openai doesn't expose top_p, but litellm does
140142
kwargs["top_p"] = self.config.top_p
141143

142-
# Handle OpenHands provider - rewrite to litellm_proxy
143-
if self.config.model.startswith("openhands/"):
144-
model_name = self.config.model.removeprefix("openhands/")
145-
self.config.model = f"litellm_proxy/{model_name}"
146-
self.config.base_url = "https://llm-proxy.app.all-hands.dev/"
147-
logger.debug(
148-
f"Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}" # noqa: E501
149-
)
150-
151144
features = get_features(self.config.model)
152145
if features.supports_reasoning_effort:
153146
# For Gemini models, only map 'low' to optimized thinking budget
@@ -229,7 +222,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
229222
if "stream" in kwargs and kwargs["stream"]:
230223
raise ValueError("Streaming is not supported in LLM class.")
231224

232-
messages_kwarg: list[dict[str, Any]] | dict[str, Any] = []
225+
messages_kwarg: (
226+
dict[str, Any] | Message | list[dict[str, Any]] | list[Message]
227+
) = []
233228
mock_function_calling = not self.is_function_calling_active()
234229

235230
# some callers might send the model and messages directly
@@ -248,9 +243,19 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
248243
messages_kwarg = kwargs["messages"]
249244

250245
# ensure we work with a list of messages
251-
messages: list[dict[str, Any]] = (
246+
messages_list = (
252247
messages_kwarg if isinstance(messages_kwarg, list) else [messages_kwarg]
253248
)
249+
# format Message objects to dict if needed
250+
messages: list[dict] = []
251+
if messages_list and isinstance(messages_list[0], Message):
252+
messages = self.format_messages_for_llm(
253+
cast(list[Message], messages_list)
254+
)
255+
else:
256+
messages = cast(list[dict[str, Any]], messages_list)
257+
258+
kwargs["messages"] = messages
254259

255260
# handle conversion of to non-function calling messages if needed
256261
original_fncall_messages = copy.deepcopy(messages)
@@ -408,6 +413,14 @@ def _all_choices(
408413

409414
self._completion = wrapper
410415

416+
@property
417+
def max_input_tokens(self) -> int | None:
418+
return self._max_input_tokens
419+
420+
@property
421+
def max_output_tokens(self) -> int | None:
422+
return self._max_output_tokens
423+
411424
@property
412425
def completion(self) -> Callable:
413426
"""Decorator for the litellm completion function.
@@ -483,41 +496,34 @@ def init_model_info(self) -> None:
483496
f"Model info: {json.dumps({'model': self.config.model, 'base_url': self.config.base_url}, indent=2)}" # noqa: E501
484497
)
485498

486-
if self.config.model.startswith("huggingface"):
487-
# HF doesn't support the OpenAI default value for top_p (1)
488-
logger.debug(
489-
f"Setting top_p to 0.9 for Hugging Face model: {self.config.model}"
490-
)
491-
self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p
492-
493499
# Set max_input_tokens from model info if not explicitly set
494500
if (
495-
self.config.max_input_tokens is None
501+
self._max_input_tokens is None
496502
and self.model_info is not None
497503
and "max_input_tokens" in self.model_info
498504
and isinstance(self.model_info["max_input_tokens"], int)
499505
):
500-
self.config.max_input_tokens = self.model_info["max_input_tokens"]
506+
self._max_input_tokens = self.model_info["max_input_tokens"]
501507

502508
# Set max_output_tokens from model info if not explicitly set
503-
if self.config.max_output_tokens is None:
509+
if self._max_output_tokens is None:
504510
# Special case for Claude 3.7 Sonnet models
505511
if any(
506512
model in self.config.model
507513
for model in ["claude-3-7-sonnet", "claude-3.7-sonnet"]
508514
):
509-
self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set # noqa: E501
515+
self._max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set # noqa: E501
510516
# Try to get from model info
511517
elif self.model_info is not None:
512518
# max_output_tokens has precedence over max_tokens
513519
if "max_output_tokens" in self.model_info and isinstance(
514520
self.model_info["max_output_tokens"], int
515521
):
516-
self.config.max_output_tokens = self.model_info["max_output_tokens"]
522+
self._max_output_tokens = self.model_info["max_output_tokens"]
517523
elif "max_tokens" in self.model_info and isinstance(
518524
self.model_info["max_tokens"], int
519525
):
520-
self.config.max_output_tokens = self.model_info["max_tokens"]
526+
self._max_output_tokens = self.model_info["max_tokens"]
521527

522528
# Initialize function calling using centralized model features
523529
features = get_features(self.config.model)

0 commit comments

Comments
 (0)