Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions examples/01_standalone_sdk/27_llm_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Example demonstrating LLM fallback functionality using FallbackRouter.

This example shows how to configure multiple language models with automatic
fallback capability. If the primary model fails (due to rate limits, timeouts,
or service unavailability), the system automatically falls back to secondary
models.

Use cases:
- High availability: Ensure your application continues working even if one
provider has an outage
- Rate limit handling: Automatically switch to a backup model when you hit
rate limits
- Cost optimization: Use expensive models as primary but have cheaper backups
"""

import os

from pydantic import SecretStr

from openhands.sdk import (
LLM,
Agent,
Conversation,
Message,
TextContent,
get_logger,
)
from openhands.sdk.llm.router import FallbackRouter
from openhands.tools.preset.default import get_default_tools


logger = get_logger(__name__)

# Configure API credentials
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "claude-sonnet-4-20250514")
base_url = os.getenv("LLM_BASE_URL")

# Configure LLMs for fallback
# First model: A powerful but potentially rate-limited model
primary_llm = LLM(
usage_id="primary",
model=model,
base_url=base_url,
api_key=SecretStr(api_key),
)

# Second model: A reliable alternative model
# In a real scenario, this might be a different provider or cheaper model
fallback_llm = LLM(
usage_id="fallback",
model="openhands/devstral-small-2507",
base_url=base_url,
api_key=SecretStr(api_key),
)

# Create FallbackRouter with a list of LLMs
# Models will be tried in the order they appear in the list
# Similar to how litellm handles fallbacks
fallback_router = FallbackRouter(
usage_id="fallback-router",
llms=[primary_llm, fallback_llm],
)

# Configure agent with fallback router
tools = get_default_tools()
agent = Agent(llm=fallback_router, tools=tools)

# Create conversation
conversation = Conversation(agent=agent, workspace=os.getcwd())

# Send a message - the router will automatically try models in order,
# falling back if one fails
conversation.send_message(
message=Message(
role="user",
content=[
TextContent(
text=(
"Hello! Can you tell me what the current date is? "
"You can use the bash tool to run the 'date' command."
)
)
],
)
)

# Run the conversation
conversation.run()

# Display results
print("=" * 100)
print("Conversation completed successfully!")
if fallback_router.active_llm:
print(f"Active model used: {fallback_router.active_llm.model}")
else:
print("No active model (no completions made)")

# Report costs
metrics = conversation.conversation_stats.get_combined_metrics()
print(f"Total cost: ${metrics.accumulated_cost:.4f}")
print(f"Total tokens: {metrics.accumulated_token_usage}")

print("\n" + "=" * 100)
print("Key features demonstrated:")
print("1. Automatic fallback when primary model fails")
print("2. Transparent switching between models")
print("3. Cost and usage tracking across all models")
print("4. Works seamlessly with agents and tools")
2 changes: 2 additions & 0 deletions openhands-sdk/openhands/sdk/llm/router/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from openhands.sdk.llm.router.base import RouterLLM
from openhands.sdk.llm.router.impl.fallback import FallbackRouter
from openhands.sdk.llm.router.impl.multimodal import MultimodalRouter
from openhands.sdk.llm.router.impl.random import RandomRouter

Expand All @@ -7,4 +8,5 @@
"RouterLLM",
"RandomRouter",
"MultimodalRouter",
"FallbackRouter",
]
121 changes: 121 additions & 0 deletions openhands-sdk/openhands/sdk/llm/router/impl/fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from collections.abc import Sequence

from pydantic import field_validator, model_validator

from openhands.sdk.llm.llm import LLM
from openhands.sdk.llm.llm_response import LLMResponse
from openhands.sdk.llm.message import Message
from openhands.sdk.llm.router.base import RouterLLM
from openhands.sdk.logger import get_logger
from openhands.sdk.tool.tool import ToolDefinition


logger = get_logger(__name__)


class FallbackRouter(RouterLLM):
"""
A RouterLLM implementation that provides fallback capability across multiple
language models. When the first model fails due to rate limits, timeouts,
or service unavailability, it automatically falls back to subsequent models.

Similar to litellm's fallback approach, models are tried in the order provided.
If all models fail, the exception from the last model is raised.

Example:
>>> primary = LLM(model="gpt-4", usage_id="primary")
>>> fallback = LLM(model="gpt-3.5-turbo", usage_id="fallback")
>>> router = FallbackRouter(
... usage_id="fallback-router",
... llms=[primary, fallback]
... )
>>> # Will try models in order until one succeeds
>>> response = router.completion(messages)
"""

router_name: str = "fallback_router"
llms: list[LLM]

@model_validator(mode="before")
@classmethod
def _convert_llms_to_routing(cls, values: dict) -> dict:
"""Convert llms list to llms_for_routing dict for base class compatibility."""
if "llms" in values and "llms_for_routing" not in values:
llms = values["llms"]
values["llms_for_routing"] = {f"llm_{i}": llm for i, llm in enumerate(llms)}
return values

@field_validator("llms")
@classmethod
def _validate_llms(cls, llms: list[LLM]) -> list[LLM]:
"""Ensure at least one LLM is provided."""
if not llms:
raise ValueError("FallbackRouter requires at least one LLM")
return llms

def select_llm(self, messages: list[Message]) -> str: # noqa: ARG002
"""
For fallback router, we always start with the first model.
The fallback logic is implemented in the completion() method.
"""
return "llm_0"

def completion(
self,
messages: list[Message],
tools: Sequence[ToolDefinition] | None = None,
return_metrics: bool = False,
add_security_risk_prediction: bool = False,
**kwargs,
) -> LLMResponse:
"""
Try models in order until one succeeds. Falls back to next model
on retry-able exceptions (rate limits, timeouts, service errors).
"""
last_exception = None

for i, llm in enumerate(self.llms):
is_last_model = i == len(self.llms) - 1

try:
logger.info(
f"FallbackRouter: Attempting completion with model "
f"{i + 1}/{len(self.llms)} ({llm.model}, usage_id={llm.usage_id})"
)
self.active_llm = llm

response = llm.completion(
messages=messages,
tools=tools,
_return_metrics=return_metrics,
add_security_risk_prediction=add_security_risk_prediction,
**kwargs,
)

logger.info(
f"FallbackRouter: Successfully completed with model "
f"{llm.model} (usage_id={llm.usage_id})"
)
return response

except Exception as e:
last_exception = e
logger.warning(
f"FallbackRouter: Model {llm.model} (usage_id={llm.usage_id}) "
f"failed with {type(e).__name__}: {str(e)}"
)

if is_last_model:
logger.error(
"FallbackRouter: All models failed. Raising last exception."
)
raise
else:
logger.info(
"FallbackRouter: Falling back to model "
f"{i + 2}/{len(self.llms)}..."
)

# This should never happen, but satisfy type checker
assert last_exception is not None
raise last_exception
Loading
Loading