Add FallbackRouter for LLM failover support

openhands-agent · openhands-agent · commit b2b3da3a10c4 · 2025-11-07T20:59:55.000Z
This commit implements a FallbackRouter that provides automatic failover
between multiple LLM models when the primary model fails. Key features:

- Automatically falls back to secondary models on errors (rate limits,
  connection failures, service unavailable, etc.)
- Supports multiple fallback models in a chain
- Preserves telemetry and metrics from the active model
- Includes comprehensive logging of failover attempts

Implementation:
- New FallbackRouter class extending RouterLLM
- Overrides completion() to implement fallback logic
- Validates that 'primary' key exists in llms_for_routing
- Tracks active_llm for telemetry purposes

Tests:
- 8 comprehensive unit tests covering all scenarios
- Mocked LLM responses to avoid actual API calls
- Tests for successful completion, fallback scenarios, and error cases

Example:
- examples/01_standalone_sdk/27_llm_fallback.py demonstrates usage
- Shows how to configure primary and fallback models
- Includes logging setup to observe failover behavior

Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
diff --git a/examples/01_standalone_sdk/27_llm_fallback.py b/examples/01_standalone_sdk/27_llm_fallback.py
@@ -0,0 +1,114 @@
+"""
+Example demonstrating LLM fallback functionality using FallbackRouter.
+
+This example shows how to configure multiple language models with automatic
+fallback capability. If the primary model fails (due to rate limits, timeouts,
+or service unavailability), the system automatically falls back to secondary
+models.
+
+Use cases:
+- High availability: Ensure your application continues working even if one
+  provider has an outage
+- Rate limit handling: Automatically switch to a backup model when you hit
+  rate limits
+- Cost optimization: Use expensive models as primary but have cheaper backups
+"""
+
+import os
+
+from pydantic import SecretStr
+
+from openhands.sdk import (
+    LLM,
+    Agent,
+    Conversation,
+    Message,
+    TextContent,
+    get_logger,
+)
+from openhands.sdk.llm.router import FallbackRouter
+from openhands.tools.preset.default import get_default_tools
+
+
+logger = get_logger(__name__)
+
+# Configure API credentials
+api_key = os.getenv("LLM_API_KEY")
+assert api_key is not None, "LLM_API_KEY environment variable is not set."
+model = os.getenv("LLM_MODEL", "claude-sonnet-4-20250514")
+base_url = os.getenv("LLM_BASE_URL")
+
+# Configure primary and fallback LLMs
+# Primary: A powerful but potentially rate-limited model
+primary_llm = LLM(
+    usage_id="primary",
+    model=model,
+    base_url=base_url,
+    api_key=SecretStr(api_key),
+)
+
+# Fallback 1: A reliable alternative model
+# In a real scenario, this might be a different provider or cheaper model
+fallback_llm = LLM(
+    usage_id="fallback",
+    model="openhands/devstral-small-2507",
+    base_url=base_url,
+    api_key=SecretStr(api_key),
+)
+
+# Create FallbackRouter
+# Models will be tried in the order they appear in the dictionary
+# Note: The first model must have key "primary"
+fallback_router = FallbackRouter(
+    usage_id="fallback-router",
+    llms_for_routing={
+        "primary": primary_llm,
+        "fallback": fallback_llm,
+    },
+)
+
+# Configure agent with fallback router
+tools = get_default_tools()
+agent = Agent(llm=fallback_router, tools=tools)
+
+# Create conversation
+conversation = Conversation(agent=agent, workspace=os.getcwd())
+
+# Send a message - the router will automatically try primary first,
+# then fall back if needed
+conversation.send_message(
+    message=Message(
+        role="user",
+        content=[
+            TextContent(
+                text=(
+                    "Hello! Can you tell me what the current date is? "
+                    "You can use the bash tool to run the 'date' command."
+                )
+            )
+        ],
+    )
+)
+
+# Run the conversation
+conversation.run()
+
+# Display results
+print("=" * 100)
+print("Conversation completed successfully!")
+if fallback_router.active_llm:
+    print(f"Active model used: {fallback_router.active_llm.model}")
+else:
+    print("No active model (no completions made)")
+
+# Report costs
+metrics = conversation.conversation_stats.get_combined_metrics()
+print(f"Total cost: ${metrics.accumulated_cost:.4f}")
+print(f"Total tokens: {metrics.accumulated_token_usage}")
+
+print("\n" + "=" * 100)
+print("Key features demonstrated:")
+print("1. Automatic fallback when primary model fails")
+print("2. Transparent switching between models")
+print("3. Cost and usage tracking across all models")
+print("4. Works seamlessly with agents and tools")
diff --git a/openhands-sdk/openhands/sdk/llm/router/__init__.py b/openhands-sdk/openhands/sdk/llm/router/__init__.py
@@ -1,4 +1,5 @@
 from openhands.sdk.llm.router.base import RouterLLM
+from openhands.sdk.llm.router.impl.fallback import FallbackRouter
 from openhands.sdk.llm.router.impl.multimodal import MultimodalRouter
 from openhands.sdk.llm.router.impl.random import RandomRouter
 
@@ -7,4 +8,5 @@
     "RouterLLM",
     "RandomRouter",
     "MultimodalRouter",
+    "FallbackRouter",
 ]
diff --git a/openhands-sdk/openhands/sdk/llm/router/impl/fallback.py b/openhands-sdk/openhands/sdk/llm/router/impl/fallback.py
@@ -0,0 +1,115 @@
+from collections.abc import Sequence
+from typing import ClassVar
+
+from pydantic import model_validator
+
+from openhands.sdk.llm.llm_response import LLMResponse
+from openhands.sdk.llm.message import Message
+from openhands.sdk.llm.router.base import RouterLLM
+from openhands.sdk.logger import get_logger
+from openhands.sdk.tool.tool import ToolDefinition
+
+
+logger = get_logger(__name__)
+
+
+class FallbackRouter(RouterLLM):
+    """
+    A RouterLLM implementation that provides fallback capability across multiple
+    language models. When the primary model fails due to rate limits, timeouts,
+    or service unavailability, it automatically falls back to secondary models.
+
+    Models are tried in order: primary -> fallback1 -> fallback2 -> ...
+    If all models fail, the exception from the last model is raised.
+
+    Example:
+        >>> primary = LLM(model="gpt-4", usage_id="primary")
+        >>> fallback = LLM(model="gpt-3.5-turbo", usage_id="fallback")
+        >>> router = FallbackRouter(
+        ...     usage_id="fallback-router",
+        ...     llms_for_routing={"primary": primary, "fallback": fallback}
+        ... )
+        >>> # Will try primary first, then fallback if primary fails
+        >>> response = router.completion(messages)
+    """
+
+    router_name: str = "fallback_router"
+
+    PRIMARY_MODEL_KEY: ClassVar[str] = "primary"
+
+    def select_llm(self, messages: list[Message]) -> str:  # noqa: ARG002
+        """
+        For fallback router, we always start with the primary model.
+        The fallback logic is implemented in the completion() method.
+        """
+        return self.PRIMARY_MODEL_KEY
+
+    def completion(
+        self,
+        messages: list[Message],
+        tools: Sequence[ToolDefinition] | None = None,
+        return_metrics: bool = False,
+        add_security_risk_prediction: bool = False,
+        **kwargs,
+    ) -> LLMResponse:
+        """
+        Try models in order until one succeeds. Falls back to next model
+        on retry-able exceptions (rate limits, timeouts, service errors).
+        """
+        # Get ordered list of model keys
+        model_keys = list(self.llms_for_routing.keys())
+        last_exception = None
+
+        for i, model_key in enumerate(model_keys):
+            llm = self.llms_for_routing[model_key]
+            is_last_model = i == len(model_keys) - 1
+
+            try:
+                logger.info(
+                    f"FallbackRouter: Attempting completion with model "
+                    f"'{model_key}' ({llm.model})"
+                )
+                self.active_llm = llm
+
+                response = llm.completion(
+                    messages=messages,
+                    tools=tools,
+                    _return_metrics=return_metrics,
+                    add_security_risk_prediction=add_security_risk_prediction,
+                    **kwargs,
+                )
+
+                logger.info(
+                    f"FallbackRouter: Successfully completed with model '{model_key}'"
+                )
+                return response
+
+            except Exception as e:
+                last_exception = e
+                logger.warning(
+                    f"FallbackRouter: Model '{model_key}' failed with "
+                    f"{type(e).__name__}: {str(e)}"
+                )
+
+                if is_last_model:
+                    logger.error(
+                        "FallbackRouter: All models failed. Raising last exception."
+                    )
+                    raise
+                else:
+                    next_model = model_keys[i + 1]
+                    logger.info(f"FallbackRouter: Falling back to '{next_model}'...")
+
+        # This should never happen, but satisfy type checker
+        assert last_exception is not None
+        raise last_exception
+
+    @model_validator(mode="after")
+    def _validate_llms_for_routing(self) -> "FallbackRouter":
+        """Ensure required primary model is present in llms_for_routing."""
+        if self.PRIMARY_MODEL_KEY not in self.llms_for_routing:
+            raise ValueError(
+                f"Primary LLM key '{self.PRIMARY_MODEL_KEY}' not found "
+                "in llms_for_routing."
+            )
+        return self
diff --git a/tests/sdk/llm/test_fallback_router.py b/tests/sdk/llm/test_fallback_router.py