-
Notifications
You must be signed in to change notification settings - Fork 204
Add SGLang Router Support #3267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
3c08dc7
f191132
23d3e69
e6c2bcb
178d5a5
b8794f1
5699556
a475858
741c0ea
d7b21f9
ebd88b8
c7a6c01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| from enum import Enum | ||
| from typing import Union | ||
|
|
||
| from pydantic import Field | ||
| from typing_extensions import Annotated, Literal | ||
|
|
||
| from dstack._internal.core.models.common import CoreModel | ||
|
|
||
|
|
||
| class RouterType(str, Enum): | ||
| SGLANG = "sglang" | ||
| VLLM = "vllm" | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| class SGLangRouterConfig(CoreModel): | ||
| type: Literal["sglang"] = "sglang" | ||
| policy: str = "cache_aware" | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| class VLLMRouterConfig(CoreModel): | ||
| type: Literal["vllm"] = "vllm" | ||
| policy: str = "cache_aware" | ||
|
|
||
|
|
||
| AnyRouterConfig = Annotated[ | ||
| Union[SGLangRouterConfig, VLLMRouterConfig], Field(discriminator="type") | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from typing import Dict, List, Optional, Type | ||
|
|
||
| from dstack._internal.core.models.routers import AnyRouterConfig, RouterType | ||
| from dstack._internal.utils.logging import get_logger | ||
|
|
||
| from .base import Replica, Router, RouterContext | ||
|
|
||
| logger = get_logger(__name__) | ||
|
|
||
| """This provides a registry of available router implementations.""" | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| _ROUTER_CLASSES: List[Type[Router]] = [] | ||
|
|
||
| try: | ||
| from dstack._internal.proxy.gateway.model_routers.sglang import SglangRouter | ||
|
|
||
| _ROUTER_CLASSES.append(SglangRouter) | ||
| logger.debug("Registered SglangRouter") | ||
| except ImportError as e: | ||
| logger.warning("SGLang router not available: %s", e) | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| _ROUTER_TYPE_TO_CLASS_MAP: Dict[RouterType, Type[Router]] = {} | ||
|
|
||
| for router_class in _ROUTER_CLASSES: | ||
| router_type_str = getattr(router_class, "TYPE", None) | ||
| if router_type_str is None: | ||
| logger.warning(f"Router class {router_class.__name__} missing TYPE attribute, skipping") | ||
| continue | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| router_type = RouterType(router_type_str) | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| _ROUTER_TYPE_TO_CLASS_MAP[router_type] = router_class | ||
|
|
||
| _AVAILABLE_ROUTER_TYPES = list(_ROUTER_TYPE_TO_CLASS_MAP.keys()) | ||
|
|
||
|
|
||
| def get_router_class(router_type: RouterType) -> Optional[Type[Router]]: | ||
| """Get the router class for a given router type.""" | ||
| return _ROUTER_TYPE_TO_CLASS_MAP.get(router_type) | ||
|
|
||
|
|
||
| def get_router(router: AnyRouterConfig, context: Optional[RouterContext] = None) -> Router: | ||
| """Factory function to create a router instance from router configuration.""" | ||
| router_type = RouterType(router.type) | ||
| router_class = get_router_class(router_type) | ||
|
|
||
| if router_class is None: | ||
| available_types = [rt.value for rt in _AVAILABLE_ROUTER_TYPES] | ||
| raise ValueError( | ||
| f"Router type '{router_type.value}' is not available. " | ||
| f"Available types: {available_types}" | ||
| ) | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| # Router implementations may have different constructor signatures | ||
| # SglangRouter takes (router, context), others might differ | ||
| return router_class(router=router, context=context) | ||
|
|
||
|
|
||
| __all__ = [ | ||
| "Router", | ||
| "RouterContext", | ||
| "Replica", | ||
| "get_router", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| from abc import ABC, abstractmethod | ||
| from pathlib import Path | ||
| from typing import List, Literal, Optional | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
| from dstack._internal.core.models.routers import AnyRouterConfig | ||
|
|
||
|
|
||
| class RouterContext(BaseModel): | ||
| """Context for router initialization and configuration.""" | ||
|
|
||
| class Config: | ||
| frozen = True | ||
|
|
||
| host: str = "127.0.0.1" | ||
| port: int = 3000 | ||
| log_dir: Path = Path("./router_logs") | ||
|
||
| log_level: Literal["debug", "info", "warning", "error"] = "info" | ||
|
|
||
|
|
||
| class Replica(BaseModel): | ||
| """Represents a single replica (worker) endpoint managed by the router. | ||
| The model field identifies which model this replica serves. | ||
| In SGLang, model = model_id (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct"). | ||
| """ | ||
|
|
||
| url: str # HTTP URL where the replica is accessible (e.g., "http://127.0.0.1:10001") | ||
| model: str # (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct") | ||
|
|
||
|
|
||
| class Router(ABC): | ||
| """Abstract base class for router implementations (e.g., SGLang, vLLM). | ||
| A router manages the lifecycle of worker replicas and handles request routing. | ||
| Different router implementations may have different mechanisms for managing | ||
| replicas. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| router: Optional[AnyRouterConfig] = None, | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| context: Optional[RouterContext] = None, | ||
| ): | ||
| """Initialize router with context. | ||
| Args: | ||
| router: Optional router configuration (implementation-specific) | ||
| context: Runtime context for the router (host, port, logging, etc.) | ||
| """ | ||
| self.context = context or RouterContext() | ||
|
|
||
| @abstractmethod | ||
| def start(self) -> None: | ||
| """Start the router process. | ||
| Raises: | ||
| Exception: If the router fails to start. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def stop(self) -> None: | ||
| """Stop the router process. | ||
| Raises: | ||
| Exception: If the router fails to stop. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def is_running(self) -> bool: | ||
| """Check if the router is currently running and responding. | ||
| Returns: | ||
| True if the router is running and healthy, False otherwise. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def register_replicas( | ||
| self, domain: str, num_replicas: int, model_id: Optional[str] = None | ||
| ) -> List[Replica]: | ||
| """Register replicas to a domain (allocate ports/URLs for workers). | ||
| Args: | ||
| domain: The domain name for this service. | ||
| num_replicas: The number of replicas to allocate for this domain. | ||
| model_id: Optional model identifier (e.g., "meta-llama/Meta-Llama-3.1-8B-Instruct"). | ||
| Required only for routers that support IGW (Inference Gateway) mode for multi-model serving. | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Returns: | ||
| List of Replica objects with allocated URLs and model_id set (if provided). | ||
| Raises: | ||
| Exception: If allocation fails. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def unregister_replicas(self, domain: str) -> None: | ||
| """Unregister replicas for a domain (remove model and unassign all its replicas). | ||
| Args: | ||
| domain: The domain name for this service. | ||
| Raises: | ||
| Exception: If removal fails or domain is not found. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def add_replicas(self, replicas: List[Replica]) -> None: | ||
| """Register replicas with the router (actual API calls to add workers). | ||
| Args: | ||
| replicas: The list of replicas to add to router. | ||
| Raises: | ||
| Exception: If adding replicas fails. | ||
| """ | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def remove_replicas(self, replicas: List[Replica]) -> None: | ||
| """Unregister replicas from the router (actual API calls to remove workers). | ||
| Args: | ||
| replicas: The list of replicas to remove from router. | ||
| Raises: | ||
| Exception: If removing replicas fails. | ||
| """ | ||
| ... | ||
jvstme marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| @abstractmethod | ||
| def update_replicas(self, replicas: List[Replica]) -> None: | ||
| """Update replicas for service, replacing the current set. | ||
| Args: | ||
| replicas: The new list of replicas for this service. | ||
| Raises: | ||
| Exception: If updating replicas fails. | ||
| """ | ||
| ... | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm certain it's not supposed to be hard-coded, we need to get the dynamic URL back.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes you are right. In production, it will not be hardcoded and the hardcoded URL will be replaced by
return f"{base_url}/dstack_gateway-{build}-py3-none-any.whl