-
Notifications
You must be signed in to change notification settings - Fork 7
Add pytest utils and smoke test for vllm-detector-adapter [using cpu vllm version] #72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,13 @@ jobs: | |
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v3 | ||
| - name: Install system dependencies | ||
| run: | | ||
| sudo apt-get update -y | ||
| sudo apt-get install -y gcc-12 g++-12 libnuma-dev | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also it seems we have split the vLLM installation in 2 parts with gcc being installed here and rest of it getting installed in tox. Any reason for this split ? |
||
| sudo update-alternatives \ | ||
| --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \ | ||
| --slave /usr/bin/g++ g++ /usr/bin/g++-12 | ||
| - name: Set up Python 3.11 | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,116 @@ | ||
| """ | ||
| Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server | ||
| """ | ||
|
|
||
| # Future | ||
| from __future__ import annotations | ||
|
|
||
| # Standard | ||
| from collections.abc import Generator | ||
| import argparse | ||
| import asyncio | ||
| import signal | ||
| import sys | ||
| import threading | ||
| import traceback | ||
|
|
||
| # Third Party | ||
| from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args | ||
| from vllm.utils import FlexibleArgumentParser | ||
| import pytest | ||
| import requests | ||
|
|
||
| # Local | ||
| from .utils import TaskFailedError, get_random_port, wait_until | ||
| from vllm_detector_adapter.api_server import add_chat_detection_params, run_server | ||
| from vllm_detector_adapter.utils import LocalEnvVarArgumentParser | ||
|
|
||
|
|
||
| @pytest.fixture(scope="session") | ||
| def http_server_port() -> int: | ||
| """Port for the http server""" | ||
| return get_random_port() | ||
|
|
||
|
|
||
| @pytest.fixture(scope="session") | ||
| def http_server_url(http_server_port: int) -> str: | ||
| """Url for the http server""" | ||
| return f"http://localhost:{http_server_port}" | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def args(monkeypatch, http_server_port: int) -> argparse.Namespace: | ||
| """Mimic: python -m vllm_detector_adapter.api_server --model <MODEL> …""" | ||
| # Use a 'tiny' model for test purposes | ||
| model_name = "facebook/opt-125m" | ||
|
|
||
| mock_argv = [ | ||
| "api_server.py", | ||
| "--model", | ||
| model_name, | ||
| f"--port={http_server_port}", | ||
| "--host=localhost", | ||
| "--dtype=float32", | ||
| "--device=cpu", | ||
| ] | ||
| monkeypatch.setattr(sys, "argv", mock_argv, raising=False) | ||
|
|
||
| # Build parser like __main__ in api_server.py | ||
| base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.") | ||
| parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser)) | ||
| parser = add_chat_detection_params(parser) | ||
| args = parser.parse_args() | ||
| validate_parsed_serve_args(args) | ||
| return args | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def _servers( | ||
| args: argparse.Namespace, | ||
| http_server_url: str, | ||
| monkeypatch, | ||
| ) -> Generator[None, None, None]: | ||
| """Start server in background thread""" | ||
| loop = asyncio.new_event_loop() | ||
| task: asyncio.Task | None = None | ||
|
|
||
| # Patch signal handling so child threads don’t touch the OS handler table | ||
| monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None) | ||
| monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None) | ||
|
|
||
| def target() -> None: | ||
| nonlocal task | ||
| task = loop.create_task(run_server(args)) | ||
| try: | ||
| print("[conftest] starting run server...", flush=True) | ||
| loop.run_until_complete(task) | ||
| except Exception as e: | ||
| print("[conftest] server failed to start:", e, flush=True) | ||
| traceback.print_exc | ||
| raise | ||
| finally: | ||
| loop.close() | ||
|
|
||
| t = threading.Thread(target=target, name="vllm-detector-server") | ||
| t.start() | ||
|
|
||
| def _health() -> bool: | ||
| if task and task.done(): | ||
| raise TaskFailedError(task.exception()) | ||
| requests.get(f"{http_server_url}/health", timeout=1).raise_for_status() | ||
| return True | ||
|
|
||
| try: | ||
| wait_until(_health, timeout=120.0, interval=1.0) | ||
| # tests execute with live server | ||
| yield | ||
| finally: | ||
| if task: | ||
| task.cancel() | ||
| t.join() | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def api_base_url(_servers, http_server_url: str) -> str: | ||
| """Pulls up the server and returns the URL to tests""" | ||
| return http_server_url |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| # Third Party | ||
| import requests | ||
|
|
||
|
|
||
| def test_startup(api_base_url): | ||
| """Smoke-test: test that the servers starts and health endpoint returns a 200 status code""" | ||
| r = requests.get(f"{api_base_url}/health", timeout=5) | ||
| assert r.status_code == 200 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| """Utility helpers shared by the test suite.""" | ||
|
|
||
| # Future | ||
| from __future__ import annotations | ||
|
|
||
| # Standard | ||
| from typing import Callable, TypeVar | ||
| import socket | ||
| import time | ||
|
|
||
| __all__ = ["get_random_port", "wait_until", "TaskFailedError"] | ||
|
|
||
| T = TypeVar("T") | ||
| Predicate = Callable[[], bool] | ||
|
|
||
|
|
||
| class TaskFailedError(RuntimeError): | ||
| """Raised when the background server task exits unexpectedly.""" | ||
|
|
||
|
|
||
| def get_random_port() -> int: | ||
| """Get an unused TCP port""" | ||
| with socket.socket() as s: | ||
| s.bind(("localhost", 0)) | ||
| return s.getsockname()[1] | ||
|
|
||
|
|
||
| def wait_until( | ||
| predicate: Predicate, | ||
| *, | ||
| timeout: float = 30.0, | ||
| interval: float = 0.5, | ||
| ) -> None: | ||
| """ | ||
| Poll predicate until it returns True or timeout seconds elapse. | ||
| """ | ||
| deadline = time.monotonic() + timeout | ||
| while True: | ||
| try: | ||
| if predicate(): | ||
| return | ||
| except Exception: | ||
| pass | ||
|
|
||
| if time.monotonic() >= deadline: | ||
| raise TimeoutError("Timed out waiting for condition") | ||
|
|
||
| time.sleep(interval) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,11 +2,10 @@ | |
| envlist = py, lint, fmt | ||
|
|
||
| [testenv] | ||
| description = run tests with pytest with coverage | ||
| description = shared defaults for test envs | ||
| extras = | ||
| all | ||
| dev-test | ||
| vllm | ||
| passenv = | ||
| LOG_LEVEL | ||
| LOG_FILTERS | ||
|
|
@@ -16,9 +15,33 @@ passenv = | |
| setenv = | ||
| DFTYPE = pandas_all | ||
|
|
||
| allowlist_externals = | ||
| git | ||
| rm | ||
| sh | ||
|
|
||
| [testenv:py] | ||
| description = run tests with pytest with coverage | ||
| # BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source | ||
| commands_pre = | ||
| # 1) Clone vLLM v0.8.4 | ||
| rm -rf {envtmpdir}/vllm_source | ||
| git clone --branch v0.8.4 \ | ||
| https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source | ||
|
Comment on lines
+29
to
+30
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a few concerns here: (1) This adds another source of vllm dependency truth vs. the pyproject.toml which is confusing...for example the (2) When changing versions here, I seem to hit some edge cases when I run this locally - the new version will seem to install, but the tests are run on the old version. For example, if I run this as is, then change the tag to
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: but isn't the build and test with tox test just for the git workflow? If a user wanted to install vllm and test the pytest I thought they would install this project and whichever vllm version they wish and then just run the pytest test/test_http_server for example? They should be able to run locally on whichever version then.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tox is used for the git workflow but its main goal is to provide standardized testing env, dependency management etc. via a virtual env - developers can then also just run
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, then a bigger issue in the tox case is that a prerequisite of installing system dependencies such as the compilers (as done with the change I made to the git workflow is a prerequisite that will vary from machine to machine, and it can't go in the tox.ini for that reason, but works fine for the git workflow since it's addressed before running example: for my RHEL8 dev vm, I had to install the compilers like the following: May need more discussion here...
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this installation method for vLLM, we are somewhat missing the goal of where we want to go with this story. Essentially, we want to be able to quickly test if newer version of vLLM would work or not. We already have work going on in PR #67 to automatically figure out next update for vLLM version. Next step, after 67 PR is to automatically update vLLM version and test if that works. This work, in PR 72, is aimed to make that kind of testing feasible. So, if we are installing vLLM dependency with hardcoded version here, then it would add another task of update vLLM in one more place, and since tox is totally different file format, it would add more complexity to the system. Additionally, even out of the context of the use-case I mentioned above, having multiple places for managing vLLM workflow would make it harder to use such a test for more than the current version support, since we would need to change vLLM in multiple places everytime, which becomes error prone. So we need to figure out new approach for how we can install vLLM in CPU mode using as much standard installation package definition in |
||
|
|
||
| # 2) Install Python build deps | ||
| {envpython} -m pip install --upgrade pip | ||
| {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy | ||
| {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \ | ||
| --extra-index-url https://download.pytorch.org/whl/cpu | ||
|
|
||
| # 3) Build & install vLLM in CPU mode | ||
| sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install" | ||
swith004 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning | ||
| ; -W ignore::DeprecationWarning | ||
|
|
||
|
|
||
| ; Unclear: We probably want to test wheel packaging | ||
| ; But! tox will fail when this is set and _any_ interpreter is missing | ||
| ; Without this, sdist packaging is tested so that's a start. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: a comment here giving more context around why we are installing gcc would be helpful