From f574d71430aab7e56ebdfceeb3d2b261c1c553df Mon Sep 17 00:00:00 2001 From: Shonda-Adena-Witherspoon Date: Tue, 6 May 2025 14:50:43 -0500 Subject: [PATCH 1/4] added server boot test files and utils for cpu vllm Signed-off-by: Shonda-Adena-Witherspoon --- .github/workflows/build.yml | 7 +++ tests/conftest.py | 116 ++++++++++++++++++++++++++++++++++++ tests/test_http_server.py | 8 +++ tests/utils.py | 48 +++++++++++++++ tox.ini | 32 +++++++++- 5 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_http_server.py create mode 100644 tests/utils.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e85249e..404827e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,6 +11,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Install system dependencies + run: | + sudo apt-get update -y + sudo apt-get install -y gcc-12 g++-12 libnuma-dev + sudo update-alternatives \ + --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \ + --slave /usr/bin/g++ g++ /usr/bin/g++-12 - name: Set up Python 3.11 uses: actions/setup-python@v4 with: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..232e885 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,116 @@ +""" +Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server +""" + +# Future +from __future__ import annotations + +# Standard +from collections.abc import Generator +import argparse +import asyncio +import signal +import sys +import threading +import traceback + +# Third Party +from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args +from vllm.utils import FlexibleArgumentParser +import pytest +import requests + +# Local +from .utils import TaskFailedError, get_random_port, wait_until +from vllm_detector_adapter.api_server import add_chat_detection_params, run_server +from vllm_detector_adapter.utils import LocalEnvVarArgumentParser + + +@pytest.fixture(scope="session") +def http_server_port() -> int: + """Port for the http server""" + return get_random_port() + + +@pytest.fixture(scope="session") +def http_server_url(http_server_port: int) -> str: + """Url for the http server""" + return f"http://localhost:{http_server_port}" + + +@pytest.fixture +def args(monkeypatch, http_server_port: int) -> argparse.Namespace: + """Mimic: python -m vllm_detector_adapter.api_server --model …""" + # Use a 'tiny' model for test purposes + model_name = "facebook/opt-125m" + + mock_argv = [ + "__main__.py", + "--model", + model_name, + f"--port={http_server_port}", + "--host=localhost", + "--dtype=float32", + "--device=cpu", + ] + monkeypatch.setattr(sys, "argv", mock_argv, raising=False) + + # Build parser like __main__ in api.server.py + base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.") + parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser)) + parser = add_chat_detection_params(parser) + args = parser.parse_args() + validate_parsed_serve_args(args) + return args + + +@pytest.fixture +def _servers( + args: argparse.Namespace, + http_server_url: str, + monkeypatch, +) -> Generator[None, None, None]: + """Start server in background thread""" + loop = asyncio.new_event_loop() + task: asyncio.Task | None = None + + # Patch signal handling so child threads don’t touch the OS handler table + monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None) + monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None) + + def target() -> None: + nonlocal task + task = loop.create_task(run_server(args)) + try: + print("[conftest] starting run server...", flush=True) + loop.run_until_complete(task) + except Exception as e: + print("[conftest] server failed to start:", e, flush=True) + traceback.print_exc + raise + finally: + loop.close() + + t = threading.Thread(target=target, name="vllm-detector-server") + t.start() + + def _health() -> bool: + if task and task.done(): + raise TaskFailedError(task.exception()) + requests.get(f"{http_server_url}/health", timeout=1).raise_for_status() + return True + + try: + wait_until(_health, timeout=120.0, interval=1.0) + # tests execute with live server + yield + finally: + if task: + task.cancel() + t.join() + + +@pytest.fixture +def api_base_url(_servers, http_server_url: str) -> str: + """Pulls up the server and returns the URL to tests""" + return http_server_url diff --git a/tests/test_http_server.py b/tests/test_http_server.py new file mode 100644 index 0000000..1f885a3 --- /dev/null +++ b/tests/test_http_server.py @@ -0,0 +1,8 @@ +# Third Party +import requests + + +def test_startup(api_base_url): + """Smoke-test: test that the servers starts and health endpoint returns a 200 status code""" + r = requests.get(f"{api_base_url}/health", timeout=5) + assert r.status_code == 200 diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..b1129c3 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,48 @@ +"""Utility helpers shared by the test suite.""" + +# Future +from __future__ import annotations + +# Standard +from typing import Callable, TypeVar +import socket +import time + +__all__ = ["get_random_port", "wait_until", "TaskFailedError"] + +T = TypeVar("T") +Predicate = Callable[[], bool] + + +class TaskFailedError(RuntimeError): + """Raised when the background server task exits unexpectedly.""" + + +def get_random_port() -> int: + """Get an unused TCP port""" + with socket.socket() as s: + s.bind(("localhost", 0)) + return s.getsockname()[1] + + +def wait_until( + predicate: Predicate, + *, + timeout: float = 30.0, + interval: float = 0.5, +) -> None: + """ + Poll predicate until it returns True or timeout seconds elapse. + """ + deadline = time.monotonic() + timeout + while True: + try: + if predicate(): + return + except Exception: + pass + + if time.monotonic() >= deadline: + raise TimeoutError("Timed out waiting for condition") + + time.sleep(interval) diff --git a/tox.ini b/tox.ini index 79447ad..e79242a 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,6 @@ description = run tests with pytest with coverage extras = all dev-test - vllm passenv = LOG_LEVEL LOG_FILTERS @@ -15,10 +14,35 @@ passenv = LOG_CHANNEL_WIDTH setenv = DFTYPE = pandas_all + VLLM_LOGGING_LEVEL = DEBUG + VLLM_TARGET_DEVICE=cpu -commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning +allowlist_externals = + git + rm + sh + +# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ── +commands_pre = + # 1) clone exactly vLLM v0.8.4 + rm -rf {envtmpdir}/vllm_source + git clone --branch v0.8.4 \ + https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source + + # 2) install its Python build deps + {envpython} -m pip install --upgrade pip + {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy + {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \ + --extra-index-url https://download.pytorch.org/whl/cpu + + # 3) build & install vLLM in CPU mode + sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install" + #{envpython} -m pip install {envtmpdir}/vllm_source + +commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning ; -W ignore::DeprecationWarning + ; Unclear: We probably want to test wheel packaging ; But! tox will fail when this is set and _any_ interpreter is missing ; Without this, sdist packaging is tested so that's a start. @@ -34,4 +58,8 @@ allowlist_externals = ./scripts/fmt.sh description = lint with ruff extras = dev-fmt +allowlist_externals = + git + rm + sh commands = ruff check vllm_detector_adapter From 8e0b6dcd92e4eee199905628b7bd41c23e2917d4 Mon Sep 17 00:00:00 2001 From: Shonda-Adena-Witherspoon Date: Tue, 6 May 2025 15:17:06 -0500 Subject: [PATCH 2/4] updated tox.ini environments Signed-off-by: Shonda-Adena-Witherspoon --- tox.ini | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tox.ini b/tox.ini index e79242a..a29e2f5 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ envlist = py, lint, fmt [testenv] -description = run tests with pytest with coverage +description = shared defaults for test envs extras = all dev-test @@ -22,24 +22,25 @@ allowlist_externals = rm sh -# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ── +[testenv:py] +description = run tests with pytest with coverage +# BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source commands_pre = - # 1) clone exactly vLLM v0.8.4 + # 1) Clone vLLM v0.8.4 rm -rf {envtmpdir}/vllm_source git clone --branch v0.8.4 \ https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source - # 2) install its Python build deps + # 2) Install Python build deps {envpython} -m pip install --upgrade pip {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \ --extra-index-url https://download.pytorch.org/whl/cpu - # 3) build & install vLLM in CPU mode + # 3) Build & install vLLM in CPU mode sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install" - #{envpython} -m pip install {envtmpdir}/vllm_source -commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning +commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning ; -W ignore::DeprecationWarning @@ -58,8 +59,4 @@ allowlist_externals = ./scripts/fmt.sh description = lint with ruff extras = dev-fmt -allowlist_externals = - git - rm - sh commands = ruff check vllm_detector_adapter From e991c29cca8e0e43534d0a37cf3df5e2bef4f3d7 Mon Sep 17 00:00:00 2001 From: swith004 Date: Tue, 6 May 2025 18:14:37 -0400 Subject: [PATCH 3/4] fix typo Co-authored-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> Signed-off-by: swith004 --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 232e885..06bce6e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -55,7 +55,7 @@ def args(monkeypatch, http_server_port: int) -> argparse.Namespace: ] monkeypatch.setattr(sys, "argv", mock_argv, raising=False) - # Build parser like __main__ in api.server.py + # Build parser like __main__ in api_server.py base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.") parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser)) parser = add_chat_detection_params(parser) From 93eb532b7c6485c9b75a986c664238e9311db3d4 Mon Sep 17 00:00:00 2001 From: Shonda-Adena-Witherspoon Date: Tue, 6 May 2025 19:18:54 -0500 Subject: [PATCH 4/4] update tox envs and mock arg script name Signed-off-by: Shonda-Adena-Witherspoon --- tests/conftest.py | 2 +- tox.ini | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 06bce6e..2e045fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,7 +45,7 @@ def args(monkeypatch, http_server_port: int) -> argparse.Namespace: model_name = "facebook/opt-125m" mock_argv = [ - "__main__.py", + "api_server.py", "--model", model_name, f"--port={http_server_port}", diff --git a/tox.ini b/tox.ini index a29e2f5..bed334d 100644 --- a/tox.ini +++ b/tox.ini @@ -14,8 +14,6 @@ passenv = LOG_CHANNEL_WIDTH setenv = DFTYPE = pandas_all - VLLM_LOGGING_LEVEL = DEBUG - VLLM_TARGET_DEVICE=cpu allowlist_externals = git