Skip to content

Commit b2b3da3

Browse files
Add FallbackRouter for LLM failover support
This commit implements a FallbackRouter that provides automatic failover between multiple LLM models when the primary model fails. Key features: - Automatically falls back to secondary models on errors (rate limits, connection failures, service unavailable, etc.) - Supports multiple fallback models in a chain - Preserves telemetry and metrics from the active model - Includes comprehensive logging of failover attempts Implementation: - New FallbackRouter class extending RouterLLM - Overrides completion() to implement fallback logic - Validates that 'primary' key exists in llms_for_routing - Tracks active_llm for telemetry purposes Tests: - 8 comprehensive unit tests covering all scenarios - Mocked LLM responses to avoid actual API calls - Tests for successful completion, fallback scenarios, and error cases Example: - examples/01_standalone_sdk/27_llm_fallback.py demonstrates usage - Shows how to configure primary and fallback models - Includes logging setup to observe failover behavior Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 9652de4 commit b2b3da3

File tree

4 files changed

+524
-0
lines changed

4 files changed

+524
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
Example demonstrating LLM fallback functionality using FallbackRouter.
3+
4+
This example shows how to configure multiple language models with automatic
5+
fallback capability. If the primary model fails (due to rate limits, timeouts,
6+
or service unavailability), the system automatically falls back to secondary
7+
models.
8+
9+
Use cases:
10+
- High availability: Ensure your application continues working even if one
11+
provider has an outage
12+
- Rate limit handling: Automatically switch to a backup model when you hit
13+
rate limits
14+
- Cost optimization: Use expensive models as primary but have cheaper backups
15+
"""
16+
17+
import os
18+
19+
from pydantic import SecretStr
20+
21+
from openhands.sdk import (
22+
LLM,
23+
Agent,
24+
Conversation,
25+
Message,
26+
TextContent,
27+
get_logger,
28+
)
29+
from openhands.sdk.llm.router import FallbackRouter
30+
from openhands.tools.preset.default import get_default_tools
31+
32+
33+
logger = get_logger(__name__)
34+
35+
# Configure API credentials
36+
api_key = os.getenv("LLM_API_KEY")
37+
assert api_key is not None, "LLM_API_KEY environment variable is not set."
38+
model = os.getenv("LLM_MODEL", "claude-sonnet-4-20250514")
39+
base_url = os.getenv("LLM_BASE_URL")
40+
41+
# Configure primary and fallback LLMs
42+
# Primary: A powerful but potentially rate-limited model
43+
primary_llm = LLM(
44+
usage_id="primary",
45+
model=model,
46+
base_url=base_url,
47+
api_key=SecretStr(api_key),
48+
)
49+
50+
# Fallback 1: A reliable alternative model
51+
# In a real scenario, this might be a different provider or cheaper model
52+
fallback_llm = LLM(
53+
usage_id="fallback",
54+
model="openhands/devstral-small-2507",
55+
base_url=base_url,
56+
api_key=SecretStr(api_key),
57+
)
58+
59+
# Create FallbackRouter
60+
# Models will be tried in the order they appear in the dictionary
61+
# Note: The first model must have key "primary"
62+
fallback_router = FallbackRouter(
63+
usage_id="fallback-router",
64+
llms_for_routing={
65+
"primary": primary_llm,
66+
"fallback": fallback_llm,
67+
},
68+
)
69+
70+
# Configure agent with fallback router
71+
tools = get_default_tools()
72+
agent = Agent(llm=fallback_router, tools=tools)
73+
74+
# Create conversation
75+
conversation = Conversation(agent=agent, workspace=os.getcwd())
76+
77+
# Send a message - the router will automatically try primary first,
78+
# then fall back if needed
79+
conversation.send_message(
80+
message=Message(
81+
role="user",
82+
content=[
83+
TextContent(
84+
text=(
85+
"Hello! Can you tell me what the current date is? "
86+
"You can use the bash tool to run the 'date' command."
87+
)
88+
)
89+
],
90+
)
91+
)
92+
93+
# Run the conversation
94+
conversation.run()
95+
96+
# Display results
97+
print("=" * 100)
98+
print("Conversation completed successfully!")
99+
if fallback_router.active_llm:
100+
print(f"Active model used: {fallback_router.active_llm.model}")
101+
else:
102+
print("No active model (no completions made)")
103+
104+
# Report costs
105+
metrics = conversation.conversation_stats.get_combined_metrics()
106+
print(f"Total cost: ${metrics.accumulated_cost:.4f}")
107+
print(f"Total tokens: {metrics.accumulated_token_usage}")
108+
109+
print("\n" + "=" * 100)
110+
print("Key features demonstrated:")
111+
print("1. Automatic fallback when primary model fails")
112+
print("2. Transparent switching between models")
113+
print("3. Cost and usage tracking across all models")
114+
print("4. Works seamlessly with agents and tools")

openhands-sdk/openhands/sdk/llm/router/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from openhands.sdk.llm.router.base import RouterLLM
2+
from openhands.sdk.llm.router.impl.fallback import FallbackRouter
23
from openhands.sdk.llm.router.impl.multimodal import MultimodalRouter
34
from openhands.sdk.llm.router.impl.random import RandomRouter
45

@@ -7,4 +8,5 @@
78
"RouterLLM",
89
"RandomRouter",
910
"MultimodalRouter",
11+
"FallbackRouter",
1012
]
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
from collections.abc import Sequence
2+
from typing import ClassVar
3+
4+
from pydantic import model_validator
5+
6+
from openhands.sdk.llm.llm_response import LLMResponse
7+
from openhands.sdk.llm.message import Message
8+
from openhands.sdk.llm.router.base import RouterLLM
9+
from openhands.sdk.logger import get_logger
10+
from openhands.sdk.tool.tool import ToolDefinition
11+
12+
13+
logger = get_logger(__name__)
14+
15+
16+
class FallbackRouter(RouterLLM):
17+
"""
18+
A RouterLLM implementation that provides fallback capability across multiple
19+
language models. When the primary model fails due to rate limits, timeouts,
20+
or service unavailability, it automatically falls back to secondary models.
21+
22+
Models are tried in order: primary -> fallback1 -> fallback2 -> ...
23+
If all models fail, the exception from the last model is raised.
24+
25+
Example:
26+
>>> primary = LLM(model="gpt-4", usage_id="primary")
27+
>>> fallback = LLM(model="gpt-3.5-turbo", usage_id="fallback")
28+
>>> router = FallbackRouter(
29+
... usage_id="fallback-router",
30+
... llms_for_routing={"primary": primary, "fallback": fallback}
31+
... )
32+
>>> # Will try primary first, then fallback if primary fails
33+
>>> response = router.completion(messages)
34+
"""
35+
36+
router_name: str = "fallback_router"
37+
38+
PRIMARY_MODEL_KEY: ClassVar[str] = "primary"
39+
40+
def select_llm(self, messages: list[Message]) -> str: # noqa: ARG002
41+
"""
42+
For fallback router, we always start with the primary model.
43+
The fallback logic is implemented in the completion() method.
44+
"""
45+
return self.PRIMARY_MODEL_KEY
46+
47+
def completion(
48+
self,
49+
messages: list[Message],
50+
tools: Sequence[ToolDefinition] | None = None,
51+
return_metrics: bool = False,
52+
add_security_risk_prediction: bool = False,
53+
**kwargs,
54+
) -> LLMResponse:
55+
"""
56+
Try models in order until one succeeds. Falls back to next model
57+
on retry-able exceptions (rate limits, timeouts, service errors).
58+
"""
59+
# Get ordered list of model keys
60+
model_keys = list(self.llms_for_routing.keys())
61+
last_exception = None
62+
63+
for i, model_key in enumerate(model_keys):
64+
llm = self.llms_for_routing[model_key]
65+
is_last_model = i == len(model_keys) - 1
66+
67+
try:
68+
logger.info(
69+
f"FallbackRouter: Attempting completion with model "
70+
f"'{model_key}' ({llm.model})"
71+
)
72+
self.active_llm = llm
73+
74+
response = llm.completion(
75+
messages=messages,
76+
tools=tools,
77+
_return_metrics=return_metrics,
78+
add_security_risk_prediction=add_security_risk_prediction,
79+
**kwargs,
80+
)
81+
82+
logger.info(
83+
f"FallbackRouter: Successfully completed with model '{model_key}'"
84+
)
85+
return response
86+
87+
except Exception as e:
88+
last_exception = e
89+
logger.warning(
90+
f"FallbackRouter: Model '{model_key}' failed with "
91+
f"{type(e).__name__}: {str(e)}"
92+
)
93+
94+
if is_last_model:
95+
logger.error(
96+
"FallbackRouter: All models failed. Raising last exception."
97+
)
98+
raise
99+
else:
100+
next_model = model_keys[i + 1]
101+
logger.info(f"FallbackRouter: Falling back to '{next_model}'...")
102+
103+
# This should never happen, but satisfy type checker
104+
assert last_exception is not None
105+
raise last_exception
106+
107+
@model_validator(mode="after")
108+
def _validate_llms_for_routing(self) -> "FallbackRouter":
109+
"""Ensure required primary model is present in llms_for_routing."""
110+
if self.PRIMARY_MODEL_KEY not in self.llms_for_routing:
111+
raise ValueError(
112+
f"Primary LLM key '{self.PRIMARY_MODEL_KEY}' not found "
113+
"in llms_for_routing."
114+
)
115+
return self

0 commit comments

Comments
 (0)