diff --git a/docs/README.md b/docs/README.md index 0608794e7e65..0c279c19f96c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,8 +30,8 @@ Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at Where to get started with vLLM depends on the type of user. If you are looking to: - Run open-source models on vLLM, we recommend starting with the [Quickstart Guide](./getting_started/quickstart.md) -- Build applications with vLLM, we recommend starting with the [User Guide](./usage) -- Build vLLM, we recommend starting with [Developer Guide](./contributing) +- Build applications with vLLM, we recommend starting with the [User Guide](./usage/README.md) +- Build vLLM, we recommend starting with [Developer Guide](./contributing/README.md) For information about the development of vLLM, see: diff --git a/docs/cli/bench/latency.md b/docs/cli/bench/latency.md index 21ab13e63781..ea7ea7321ffc 100644 --- a/docs/cli/bench/latency.md +++ b/docs/cli/bench/latency.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_latency.md" +--8<-- "docs/argparse/bench_latency.inc.md" diff --git a/docs/cli/bench/serve.md b/docs/cli/bench/serve.md index f7c415c6becb..f7dc8036cc26 100644 --- a/docs/cli/bench/serve.md +++ b/docs/cli/bench/serve.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_serve.md" +--8<-- "docs/argparse/bench_serve.inc.md" diff --git a/docs/cli/bench/sweep/plot.md b/docs/cli/bench/sweep/plot.md index f29bffb64655..a101330e093c 100644 --- a/docs/cli/bench/sweep/plot.md +++ b/docs/cli/bench/sweep/plot.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_sweep_plot.md" +--8<-- "docs/argparse/bench_sweep_plot.inc.md" diff --git a/docs/cli/bench/sweep/serve.md b/docs/cli/bench/sweep/serve.md index 5b5f91a951ed..f0468f06fc28 100644 --- a/docs/cli/bench/sweep/serve.md +++ b/docs/cli/bench/sweep/serve.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_sweep_serve.md" +--8<-- "docs/argparse/bench_sweep_serve.inc.md" diff --git a/docs/cli/bench/sweep/serve_sla.md b/docs/cli/bench/sweep/serve_sla.md index 5f8ab6005e50..5642ec67eb00 100644 --- a/docs/cli/bench/sweep/serve_sla.md +++ b/docs/cli/bench/sweep/serve_sla.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_sweep_serve_sla.md" +--8<-- "docs/argparse/bench_sweep_serve_sla.inc.md" diff --git a/docs/cli/bench/throughput.md b/docs/cli/bench/throughput.md index e4ff5ce43c9c..e7f618fb4d14 100644 --- a/docs/cli/bench/throughput.md +++ b/docs/cli/bench/throughput.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/bench_throughput.md" +--8<-- "docs/argparse/bench_throughput.inc.md" diff --git a/docs/cli/chat.md b/docs/cli/chat.md index b006cb8de60d..0246bd431b10 100644 --- a/docs/cli/chat.md +++ b/docs/cli/chat.md @@ -1,5 +1,5 @@ # vllm chat -## Options +## Arguments ---8<-- "docs/argparse/chat.md" +--8<-- "docs/argparse/chat.inc.md" diff --git a/docs/cli/complete.md b/docs/cli/complete.md index 400359acf4fb..eb2ffdaabac2 100644 --- a/docs/cli/complete.md +++ b/docs/cli/complete.md @@ -1,5 +1,5 @@ # vllm complete -## Options +## Arguments ---8<-- "docs/argparse/complete.md" +--8<-- "docs/argparse/complete.inc.md" diff --git a/docs/cli/run-batch.md b/docs/cli/run-batch.md index f7d401b8dad2..758fbda28397 100644 --- a/docs/cli/run-batch.md +++ b/docs/cli/run-batch.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/run-batch.md" +--8<-- "docs/argparse/run-batch.inc.md" diff --git a/docs/cli/serve.md b/docs/cli/serve.md index 2c8f9d320f5d..35652fec587b 100644 --- a/docs/cli/serve.md +++ b/docs/cli/serve.md @@ -4,6 +4,6 @@ --8<-- "docs/cli/json_tip.inc.md" -## Options +## Arguments ---8<-- "docs/argparse/serve.md" +--8<-- "docs/argparse/serve.inc.md" diff --git a/docs/configuration/serve_args.md b/docs/configuration/serve_args.md index c1cc5577bc7a..baaf21f01f06 100644 --- a/docs/configuration/serve_args.md +++ b/docs/configuration/serve_args.md @@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server. ## CLI Arguments The `vllm serve` command is used to launch the OpenAI-compatible server. -To see the available options, take a look at the [CLI Reference](../cli/README.md#options)! +To see the available options, take a look at the [CLI Reference](../cli/README.md)! ## Configuration file diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index ce1c5c53cf35..735074c08b8c 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -1,12 +1,15 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import importlib +import importlib.metadata +import importlib.util import logging import sys import traceback -from argparse import SUPPRESS, HelpFormatter +from argparse import SUPPRESS, Action, HelpFormatter +from collections.abc import Iterable +from importlib.machinery import ModuleSpec from pathlib import Path -from typing import Literal +from typing import TYPE_CHECKING, Literal from unittest.mock import MagicMock, patch from pydantic_core import core_schema @@ -19,6 +22,11 @@ sys.path.insert(0, str(ROOT_DIR)) +def mock_if_no_torch(mock_module: str, mock: MagicMock): + if not importlib.util.find_spec("torch"): + sys.modules[mock_module] = mock + + # Mock custom op code class MockCustomOp: @staticmethod @@ -29,18 +37,21 @@ def decorator(cls): return decorator -noop = lambda *a, **k: None -sys.modules["vllm._C"] = MagicMock() -sys.modules["vllm.model_executor.custom_op"] = MagicMock(CustomOp=MockCustomOp) -sys.modules["vllm.utils.torch_utils"] = MagicMock(direct_register_custom_op=noop) +mock_if_no_torch("vllm._C", MagicMock()) +mock_if_no_torch("vllm.model_executor.custom_op", MagicMock(CustomOp=MockCustomOp)) +mock_if_no_torch( + "vllm.utils.torch_utils", MagicMock(direct_register_custom_op=lambda *a, **k: None) +) + # Mock any version checks by reading from compiled CI requirements with open(ROOT_DIR / "requirements/test.txt") as f: VERSIONS = dict(line.strip().split("==") for line in f if "==" in line) importlib.metadata.version = lambda name: VERSIONS.get(name) or "0.0.0" + # Make torch.nn.Parameter safe to inherit from -sys.modules["torch.nn"] = MagicMock(Parameter=object) +mock_if_no_torch("torch.nn", MagicMock(Parameter=object)) class PydanticMagicMock(MagicMock): @@ -49,31 +60,34 @@ class PydanticMagicMock(MagicMock): def __init__(self, *args, **kwargs): name = kwargs.pop("name", None) super().__init__(*args, **kwargs) - self.__spec__ = importlib.machinery.ModuleSpec(name, None) + self.__spec__ = ModuleSpec(name, None) def __get_pydantic_core_schema__(self, source_type, handler): return core_schema.any_schema() -def auto_mock(module, attr, max_mocks=100): +def auto_mock(module_name: str, attr: str, max_mocks: int = 100): """Function that automatically mocks missing modules during imports.""" - logger.info("Importing %s from %s", attr, module) + logger.info("Importing %s from %s", attr, module_name) + for _ in range(max_mocks): try: + module = importlib.import_module(module_name) + # First treat attr as an attr, then as a submodule - return getattr( - importlib.import_module(module), - attr, - importlib.import_module(f"{module}.{attr}"), - ) + if hasattr(module, attr): + return getattr(module, attr) + + return importlib.import_module(f"{module_name}.{attr}") except ModuleNotFoundError as e: + assert e.name is not None logger.info("Mocking %s for argparse doc generation", e.name) sys.modules[e.name] = PydanticMagicMock(name=e.name) - except Exception as e: - logger.warning("Failed to import %s.%s: %s", module, attr, e) + except Exception: + logger.exception("Failed to import %s.%s: %s", module_name, attr) raise ImportError( - f"Failed to import {module}.{attr} after mocking {max_mocks} imports" + f"Failed to import {module_name}.{attr} after mocking {max_mocks} imports" ) @@ -91,21 +105,26 @@ def auto_mock(module, attr, max_mocks=100): CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand") openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args") openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch") -FlexibleArgumentParser = auto_mock( - "vllm.utils.argparse_utils", "FlexibleArgumentParser" -) + +if TYPE_CHECKING: + from vllm.utils.argparse_utils import FlexibleArgumentParser +else: + FlexibleArgumentParser = auto_mock( + "vllm.utils.argparse_utils", "FlexibleArgumentParser" + ) class MarkdownFormatter(HelpFormatter): """Custom formatter that generates markdown for argument groups.""" - def __init__(self, prog, starting_heading_level=3): - super().__init__(prog, max_help_position=float("inf"), width=float("inf")) + def __init__(self, prog: str, starting_heading_level: int = 3): + super().__init__(prog, max_help_position=sys.maxsize, width=sys.maxsize) + self._section_heading_prefix = "#" * starting_heading_level self._argument_heading_prefix = "#" * (starting_heading_level + 1) self._markdown_output = [] - def start_section(self, heading): + def start_section(self, heading: str): if heading not in {"positional arguments", "options"}: heading_md = f"\n{self._section_heading_prefix} {heading}\n\n" self._markdown_output.append(heading_md) @@ -113,14 +132,14 @@ def start_section(self, heading): def end_section(self): pass - def add_text(self, text): + def add_text(self, text: str): if text: self._markdown_output.append(f"{text.strip()}\n\n") def add_usage(self, usage, actions, groups, prefix=None): pass - def add_arguments(self, actions): + def add_arguments(self, actions: Iterable[Action]): for action in actions: if len(action.option_strings) == 0 or "--help" in action.option_strings: continue @@ -169,7 +188,7 @@ def create_parser(add_cli_args, **kwargs) -> FlexibleArgumentParser: # Auto-mock runtime imports if tb_list := traceback.extract_tb(e.__traceback__): path = Path(tb_list[-1].filename).relative_to(ROOT_DIR) - auto_mock(module=".".join(path.parent.parts), attr=path.stem) + auto_mock(module_name=".".join(path.parent.parts), attr=path.stem) return create_parser(add_cli_args, **kwargs) else: raise e @@ -209,7 +228,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): # Generate documentation for each parser for stem, parser in parsers.items(): - doc_path = ARGPARSE_DOC_DIR / f"{stem}.md" + doc_path = ARGPARSE_DOC_DIR / f"{stem}.inc.md" # Specify encoding for building on Windows with open(doc_path, "w", encoding="utf-8") as f: f.write(super(type(parser), parser).format_help()) diff --git a/docs/usage/README.md b/docs/usage/README.md index 0c63d01f0f99..4e8ece2c0605 100644 --- a/docs/usage/README.md +++ b/docs/usage/README.md @@ -1,6 +1,6 @@ # Using vLLM -First, vLLM must be [installed](../getting_started/installation/) for your chosen device in either a Python or Docker environment. +First, vLLM must be [installed](../getting_started/installation/README.md) for your chosen device in either a Python or Docker environment. Then, vLLM supports the following usage patterns: