Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at
Where to get started with vLLM depends on the type of user. If you are looking to:

- Run open-source models on vLLM, we recommend starting with the [Quickstart Guide](./getting_started/quickstart.md)
- Build applications with vLLM, we recommend starting with the [User Guide](./usage)
- Build vLLM, we recommend starting with [Developer Guide](./contributing)
- Build applications with vLLM, we recommend starting with the [User Guide](./usage/README.md)
- Build vLLM, we recommend starting with [Developer Guide](./contributing/README.md)

For information about the development of vLLM, see:

Expand Down
4 changes: 2 additions & 2 deletions docs/cli/bench/latency.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_latency.md"
--8<-- "docs/argparse/bench_latency.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/bench/serve.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_serve.md"
--8<-- "docs/argparse/bench_serve.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/bench/sweep/plot.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_sweep_plot.md"
--8<-- "docs/argparse/bench_sweep_plot.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/bench/sweep/serve.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_sweep_serve.md"
--8<-- "docs/argparse/bench_sweep_serve.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/bench/sweep/serve_sla.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_sweep_serve_sla.md"
--8<-- "docs/argparse/bench_sweep_serve_sla.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/bench/throughput.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/bench_throughput.md"
--8<-- "docs/argparse/bench_throughput.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/chat.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# vllm chat

## Options
## Arguments

--8<-- "docs/argparse/chat.md"
--8<-- "docs/argparse/chat.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/complete.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# vllm complete

## Options
## Arguments

--8<-- "docs/argparse/complete.md"
--8<-- "docs/argparse/complete.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/run-batch.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/run-batch.md"
--8<-- "docs/argparse/run-batch.inc.md"
4 changes: 2 additions & 2 deletions docs/cli/serve.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

--8<-- "docs/cli/json_tip.inc.md"

## Options
## Arguments

--8<-- "docs/argparse/serve.md"
--8<-- "docs/argparse/serve.inc.md"
2 changes: 1 addition & 1 deletion docs/configuration/serve_args.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server.
## CLI Arguments

The `vllm serve` command is used to launch the OpenAI-compatible server.
To see the available options, take a look at the [CLI Reference](../cli/README.md#options)!
To see the available options, take a look at the [CLI Reference](../cli/README.md)!

## Configuration file

Expand Down
77 changes: 48 additions & 29 deletions docs/mkdocs/hooks/generate_argparse.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import importlib
import importlib.metadata
import importlib.util
import logging
import sys
import traceback
from argparse import SUPPRESS, HelpFormatter
from argparse import SUPPRESS, Action, HelpFormatter
from collections.abc import Iterable
from importlib.machinery import ModuleSpec
from pathlib import Path
from typing import Literal
from typing import TYPE_CHECKING, Literal
from unittest.mock import MagicMock, patch

from pydantic_core import core_schema
Expand All @@ -19,6 +22,11 @@
sys.path.insert(0, str(ROOT_DIR))


def mock_if_no_torch(mock_module: str, mock: MagicMock):
if not importlib.util.find_spec("torch"):
sys.modules[mock_module] = mock


# Mock custom op code
class MockCustomOp:
@staticmethod
Expand All @@ -29,18 +37,21 @@ def decorator(cls):
return decorator


noop = lambda *a, **k: None
sys.modules["vllm._C"] = MagicMock()
sys.modules["vllm.model_executor.custom_op"] = MagicMock(CustomOp=MockCustomOp)
sys.modules["vllm.utils.torch_utils"] = MagicMock(direct_register_custom_op=noop)
mock_if_no_torch("vllm._C", MagicMock())
mock_if_no_torch("vllm.model_executor.custom_op", MagicMock(CustomOp=MockCustomOp))
mock_if_no_torch(
"vllm.utils.torch_utils", MagicMock(direct_register_custom_op=lambda *a, **k: None)
)


# Mock any version checks by reading from compiled CI requirements
with open(ROOT_DIR / "requirements/test.txt") as f:
VERSIONS = dict(line.strip().split("==") for line in f if "==" in line)
importlib.metadata.version = lambda name: VERSIONS.get(name) or "0.0.0"


# Make torch.nn.Parameter safe to inherit from
sys.modules["torch.nn"] = MagicMock(Parameter=object)
mock_if_no_torch("torch.nn", MagicMock(Parameter=object))


class PydanticMagicMock(MagicMock):
Expand All @@ -49,31 +60,34 @@ class PydanticMagicMock(MagicMock):
def __init__(self, *args, **kwargs):
name = kwargs.pop("name", None)
super().__init__(*args, **kwargs)
self.__spec__ = importlib.machinery.ModuleSpec(name, None)
self.__spec__ = ModuleSpec(name, None)

def __get_pydantic_core_schema__(self, source_type, handler):
return core_schema.any_schema()


def auto_mock(module, attr, max_mocks=100):
def auto_mock(module_name: str, attr: str, max_mocks: int = 100):
"""Function that automatically mocks missing modules during imports."""
logger.info("Importing %s from %s", attr, module)
logger.info("Importing %s from %s", attr, module_name)

for _ in range(max_mocks):
try:
module = importlib.import_module(module_name)

# First treat attr as an attr, then as a submodule
return getattr(
importlib.import_module(module),
attr,
importlib.import_module(f"{module}.{attr}"),
)
if hasattr(module, attr):
return getattr(module, attr)

return importlib.import_module(f"{module_name}.{attr}")
except ModuleNotFoundError as e:
assert e.name is not None
logger.info("Mocking %s for argparse doc generation", e.name)
sys.modules[e.name] = PydanticMagicMock(name=e.name)
except Exception as e:
logger.warning("Failed to import %s.%s: %s", module, attr, e)
except Exception:
logger.exception("Failed to import %s.%s: %s", module_name, attr)

raise ImportError(
f"Failed to import {module}.{attr} after mocking {max_mocks} imports"
f"Failed to import {module_name}.{attr} after mocking {max_mocks} imports"
)


Expand All @@ -91,36 +105,41 @@ def auto_mock(module, attr, max_mocks=100):
CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
FlexibleArgumentParser = auto_mock(
"vllm.utils.argparse_utils", "FlexibleArgumentParser"
)

if TYPE_CHECKING:
from vllm.utils.argparse_utils import FlexibleArgumentParser
else:
FlexibleArgumentParser = auto_mock(
"vllm.utils.argparse_utils", "FlexibleArgumentParser"
)


class MarkdownFormatter(HelpFormatter):
"""Custom formatter that generates markdown for argument groups."""

def __init__(self, prog, starting_heading_level=3):
super().__init__(prog, max_help_position=float("inf"), width=float("inf"))
def __init__(self, prog: str, starting_heading_level: int = 3):
super().__init__(prog, max_help_position=sys.maxsize, width=sys.maxsize)

self._section_heading_prefix = "#" * starting_heading_level
self._argument_heading_prefix = "#" * (starting_heading_level + 1)
self._markdown_output = []

def start_section(self, heading):
def start_section(self, heading: str):
if heading not in {"positional arguments", "options"}:
heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
self._markdown_output.append(heading_md)

def end_section(self):
pass

def add_text(self, text):
def add_text(self, text: str):
if text:
self._markdown_output.append(f"{text.strip()}\n\n")

def add_usage(self, usage, actions, groups, prefix=None):
pass

def add_arguments(self, actions):
def add_arguments(self, actions: Iterable[Action]):
for action in actions:
if len(action.option_strings) == 0 or "--help" in action.option_strings:
continue
Expand Down Expand Up @@ -169,7 +188,7 @@ def create_parser(add_cli_args, **kwargs) -> FlexibleArgumentParser:
# Auto-mock runtime imports
if tb_list := traceback.extract_tb(e.__traceback__):
path = Path(tb_list[-1].filename).relative_to(ROOT_DIR)
auto_mock(module=".".join(path.parent.parts), attr=path.stem)
auto_mock(module_name=".".join(path.parent.parts), attr=path.stem)
return create_parser(add_cli_args, **kwargs)
else:
raise e
Expand Down Expand Up @@ -209,7 +228,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):

# Generate documentation for each parser
for stem, parser in parsers.items():
doc_path = ARGPARSE_DOC_DIR / f"{stem}.md"
doc_path = ARGPARSE_DOC_DIR / f"{stem}.inc.md"
# Specify encoding for building on Windows
with open(doc_path, "w", encoding="utf-8") as f:
f.write(super(type(parser), parser).format_help())
Expand Down
2 changes: 1 addition & 1 deletion docs/usage/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Using vLLM

First, vLLM must be [installed](../getting_started/installation/) for your chosen device in either a Python or Docker environment.
First, vLLM must be [installed](../getting_started/installation/README.md) for your chosen device in either a Python or Docker environment.

Then, vLLM supports the following usage patterns:

Expand Down