|
| 1 | +import os |
| 2 | +from typing import Any |
| 3 | + |
| 4 | +from pydantic import BaseModel, ConfigDict, Field, SecretStr |
| 5 | + |
| 6 | + |
| 7 | +from openhands.core.logger import get_logger, ENV_LOG_DIR |
| 8 | + |
| 9 | +logger = get_logger(__name__) |
| 10 | + |
| 11 | + |
| 12 | +class LLMConfig(BaseModel): |
| 13 | + """Configuration for the LLM model. |
| 14 | +
|
| 15 | + Attributes: |
| 16 | + model: The model to use. |
| 17 | + api_key: The API key to use. |
| 18 | + base_url: The base URL for the API. This is necessary for local LLMs. |
| 19 | + api_version: The version of the API. |
| 20 | + aws_access_key_id: The AWS access key ID. |
| 21 | + aws_secret_access_key: The AWS secret access key. |
| 22 | + aws_region_name: The AWS region name. |
| 23 | + num_retries: The number of retries to attempt. |
| 24 | + retry_multiplier: The multiplier for the exponential backoff. |
| 25 | + retry_min_wait: The minimum time to wait between retries, in seconds. This is exponential backoff minimum. For models with very low limits, this can be set to 15-20. |
| 26 | + retry_max_wait: The maximum time to wait between retries, in seconds. This is exponential backoff maximum. |
| 27 | + timeout: The timeout for the API. |
| 28 | + max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated. |
| 29 | + temperature: The temperature for the API. |
| 30 | + top_p: The top p for the API. |
| 31 | + top_k: The top k for the API. |
| 32 | + custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side. |
| 33 | + max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4). |
| 34 | + max_output_tokens: The maximum number of output tokens. This is sent to the LLM. |
| 35 | + input_cost_per_token: The cost per input token. This will available in logs for the user to check. |
| 36 | + output_cost_per_token: The cost per output token. This will available in logs for the user to check. |
| 37 | + ollama_base_url: The base URL for the OLLAMA API. |
| 38 | + drop_params: Drop any unmapped (unsupported) params without causing an exception. |
| 39 | + modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty. |
| 40 | + disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction). |
| 41 | + caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider. |
| 42 | + log_completions: Whether to log LLM completions to the state. |
| 43 | + log_completions_folder: The folder to log LLM completions to. Required if log_completions is True. |
| 44 | + custom_tokenizer: A custom tokenizer to use for token counting. |
| 45 | + native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set. |
| 46 | + reasoning_effort: The effort to put into reasoning. This is a string that can be one of 'low', 'medium', 'high', or 'none'. Can apply to all reasoning models. |
| 47 | + seed: The seed to use for the LLM. |
| 48 | + safety_settings: Safety settings for models that support them (like Mistral AI and Gemini). |
| 49 | + """ |
| 50 | + |
| 51 | + model: str = Field(default="claude-sonnet-4-20250514") |
| 52 | + api_key: SecretStr | None = Field(default=None) |
| 53 | + base_url: str | None = Field(default=None) |
| 54 | + api_version: str | None = Field(default=None) |
| 55 | + aws_access_key_id: SecretStr | None = Field(default=None) |
| 56 | + aws_secret_access_key: SecretStr | None = Field(default=None) |
| 57 | + aws_region_name: str | None = Field(default=None) |
| 58 | + openrouter_site_url: str = Field(default="https://docs.all-hands.dev/") |
| 59 | + openrouter_app_name: str = Field(default="OpenHands") |
| 60 | + # total wait time: 8 + 16 + 32 + 64 = 120 seconds |
| 61 | + num_retries: int = Field(default=5) |
| 62 | + retry_multiplier: float = Field(default=8) |
| 63 | + retry_min_wait: int = Field(default=8) |
| 64 | + retry_max_wait: int = Field(default=64) |
| 65 | + timeout: int | None = Field(default=None) |
| 66 | + max_message_chars: int = Field( |
| 67 | + default=30_000 |
| 68 | + ) # maximum number of characters in an observation's content when sent to the llm |
| 69 | + temperature: float = Field(default=0.0) |
| 70 | + top_p: float = Field(default=1.0) |
| 71 | + top_k: float | None = Field(default=None) |
| 72 | + custom_llm_provider: str | None = Field(default=None) |
| 73 | + max_input_tokens: int | None = Field(default=None) |
| 74 | + max_output_tokens: int | None = Field(default=None) |
| 75 | + input_cost_per_token: float | None = Field(default=None) |
| 76 | + output_cost_per_token: float | None = Field(default=None) |
| 77 | + ollama_base_url: str | None = Field(default=None) |
| 78 | + # This setting can be sent in each call to litellm |
| 79 | + drop_params: bool = Field(default=True) |
| 80 | + # Note: this setting is actually global, unlike drop_params |
| 81 | + modify_params: bool = Field(default=True) |
| 82 | + disable_vision: bool | None = Field(default=None) |
| 83 | + disable_stop_word: bool | None = Field(default=False) |
| 84 | + caching_prompt: bool = Field(default=True) |
| 85 | + log_completions: bool = Field(default=False) |
| 86 | + log_completions_folder: str = Field( |
| 87 | + default=os.path.join(ENV_LOG_DIR, "completions") |
| 88 | + ) |
| 89 | + custom_tokenizer: str | None = Field(default=None) |
| 90 | + native_tool_calling: bool | None = Field(default=None) |
| 91 | + reasoning_effort: str | None = Field(default=None) |
| 92 | + seed: int | None = Field(default=None) |
| 93 | + safety_settings: list[dict[str, str]] | None = Field( |
| 94 | + default=None, |
| 95 | + description="Safety settings for models that support them (like Mistral AI and Gemini)", |
| 96 | + ) |
| 97 | + |
| 98 | + model_config = ConfigDict(extra="forbid") |
| 99 | + |
| 100 | + def model_post_init(self, __context: Any) -> None: |
| 101 | + """Post-initialization hook to assign OpenRouter-related variables to environment variables. |
| 102 | +
|
| 103 | + This ensures that these values are accessible to litellm at runtime. |
| 104 | + """ |
| 105 | + super().model_post_init(__context) |
| 106 | + |
| 107 | + # Assign OpenRouter-specific variables to environment variables |
| 108 | + if self.openrouter_site_url: |
| 109 | + os.environ["OR_SITE_URL"] = self.openrouter_site_url |
| 110 | + if self.openrouter_app_name: |
| 111 | + os.environ["OR_APP_NAME"] = self.openrouter_app_name |
| 112 | + |
| 113 | + # Set reasoning_effort to 'high' by default for non-Gemini models |
| 114 | + # Gemini models use optimized thinking budget when reasoning_effort is None |
| 115 | + if self.reasoning_effort is None and "gemini-2.5-pro" not in self.model: |
| 116 | + self.reasoning_effort = "high" |
| 117 | + |
| 118 | + # Set an API version by default for Azure models |
| 119 | + # Required for newer models. |
| 120 | + # Azure issue: https://github.com/All-Hands-AI/OpenHands/issues/7755 |
| 121 | + if self.model.startswith("azure") and self.api_version is None: |
| 122 | + self.api_version = "2024-12-01-preview" |
| 123 | + |
| 124 | + # Set AWS credentials as environment variables for LiteLLM Bedrock |
| 125 | + if self.aws_access_key_id: |
| 126 | + os.environ["AWS_ACCESS_KEY_ID"] = self.aws_access_key_id.get_secret_value() |
| 127 | + if self.aws_secret_access_key: |
| 128 | + os.environ["AWS_SECRET_ACCESS_KEY"] = ( |
| 129 | + self.aws_secret_access_key.get_secret_value() |
| 130 | + ) |
| 131 | + if self.aws_region_name: |
| 132 | + os.environ["AWS_REGION_NAME"] = self.aws_region_name |
0 commit comments