Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install system dependencies
run: |
sudo apt-get update -y
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: a comment here giving more context around why we are installing gcc would be helpful

sudo apt-get install -y gcc-12 g++-12 libnuma-dev
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also it seems we have split the vLLM installation in 2 parts with gcc being installed here and rest of it getting installed in tox.

Any reason for this split ?

sudo update-alternatives \
--install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
--slave /usr/bin/g++ g++ /usr/bin/g++-12
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
Expand Down
116 changes: 116 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""
Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server
"""

# Future
from __future__ import annotations

# Standard
from collections.abc import Generator
import argparse
import asyncio
import signal
import sys
import threading
import traceback

# Third Party
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from vllm.utils import FlexibleArgumentParser
import pytest
import requests

# Local
from .utils import TaskFailedError, get_random_port, wait_until
from vllm_detector_adapter.api_server import add_chat_detection_params, run_server
from vllm_detector_adapter.utils import LocalEnvVarArgumentParser


@pytest.fixture(scope="session")
def http_server_port() -> int:
"""Port for the http server"""
return get_random_port()


@pytest.fixture(scope="session")
def http_server_url(http_server_port: int) -> str:
"""Url for the http server"""
return f"http://localhost:{http_server_port}"


@pytest.fixture
def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
"""Mimic: python -m vllm_detector_adapter.api_server --model <MODEL> …"""
# Use a 'tiny' model for test purposes
model_name = "facebook/opt-125m"

mock_argv = [
"api_server.py",
"--model",
model_name,
f"--port={http_server_port}",
"--host=localhost",
"--dtype=float32",
"--device=cpu",
]
monkeypatch.setattr(sys, "argv", mock_argv, raising=False)

# Build parser like __main__ in api_server.py
base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.")
parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser))
parser = add_chat_detection_params(parser)
args = parser.parse_args()
validate_parsed_serve_args(args)
return args


@pytest.fixture
def _servers(
args: argparse.Namespace,
http_server_url: str,
monkeypatch,
) -> Generator[None, None, None]:
"""Start server in background thread"""
loop = asyncio.new_event_loop()
task: asyncio.Task | None = None

# Patch signal handling so child threads don’t touch the OS handler table
monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None)
monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None)

def target() -> None:
nonlocal task
task = loop.create_task(run_server(args))
try:
print("[conftest] starting run server...", flush=True)
loop.run_until_complete(task)
except Exception as e:
print("[conftest] server failed to start:", e, flush=True)
traceback.print_exc
raise
finally:
loop.close()

t = threading.Thread(target=target, name="vllm-detector-server")
t.start()

def _health() -> bool:
if task and task.done():
raise TaskFailedError(task.exception())
requests.get(f"{http_server_url}/health", timeout=1).raise_for_status()
return True

try:
wait_until(_health, timeout=120.0, interval=1.0)
# tests execute with live server
yield
finally:
if task:
task.cancel()
t.join()


@pytest.fixture
def api_base_url(_servers, http_server_url: str) -> str:
"""Pulls up the server and returns the URL to tests"""
return http_server_url
8 changes: 8 additions & 0 deletions tests/test_http_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Third Party
import requests


def test_startup(api_base_url):
"""Smoke-test: test that the servers starts and health endpoint returns a 200 status code"""
r = requests.get(f"{api_base_url}/health", timeout=5)
assert r.status_code == 200
48 changes: 48 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Utility helpers shared by the test suite."""

# Future
from __future__ import annotations

# Standard
from typing import Callable, TypeVar
import socket
import time

__all__ = ["get_random_port", "wait_until", "TaskFailedError"]

T = TypeVar("T")
Predicate = Callable[[], bool]


class TaskFailedError(RuntimeError):
"""Raised when the background server task exits unexpectedly."""


def get_random_port() -> int:
"""Get an unused TCP port"""
with socket.socket() as s:
s.bind(("localhost", 0))
return s.getsockname()[1]


def wait_until(
predicate: Predicate,
*,
timeout: float = 30.0,
interval: float = 0.5,
) -> None:
"""
Poll predicate until it returns True or timeout seconds elapse.
"""
deadline = time.monotonic() + timeout
while True:
try:
if predicate():
return
except Exception:
pass

if time.monotonic() >= deadline:
raise TimeoutError("Timed out waiting for condition")

time.sleep(interval)
27 changes: 25 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
envlist = py, lint, fmt

[testenv]
description = run tests with pytest with coverage
description = shared defaults for test envs
extras =
all
dev-test
vllm
passenv =
LOG_LEVEL
LOG_FILTERS
Expand All @@ -16,9 +15,33 @@ passenv =
setenv =
DFTYPE = pandas_all

allowlist_externals =
git
rm
sh

[testenv:py]
description = run tests with pytest with coverage
# BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source
commands_pre =
# 1) Clone vLLM v0.8.4
rm -rf {envtmpdir}/vllm_source
git clone --branch v0.8.4 \
https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source
Comment on lines +29 to +30
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a few concerns here:

(1) This adds another source of vllm dependency truth vs. the pyproject.toml which is confusing...for example the 0.8.4 branch cannot run on Mac (as written in a comment), so now if I run tox -e py on my machine, I get errors. Can we leverage the pyproject.toml info?

(2) When changing versions here, I seem to hit some edge cases when I run this locally - the new version will seem to install, but the tests are run on the old version. For example, if I run this as is, then change the tag to v0.8.5, I would expect API breakages, but I noticed the tests pass. Conversely if I start over completely [i.e. delete the tox folder with all dependencies], and run with 0.8.5, I'll get import errors as expected, but if then I switch to 0.8.3 I still get the same errors. It would be good to allow developers to run tests locally dependably across multiple versions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: but isn't the build and test with tox test just for the git workflow? If a user wanted to install vllm and test the pytest I thought they would install this project and whichever vllm version they wish and then just run the pytest test/test_http_server for example? They should be able to run locally on whichever version then.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tox is used for the git workflow but its main goal is to provide standardized testing env, dependency management etc. via a virtual env - developers can then also just run tox -e py etc. with less worry on different dependencies. It'd be good to have developers easily able to reproduce errors that may be seen in the git workflow

Copy link
Collaborator Author

@swith004 swith004 May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, then a bigger issue in the tox case is that a prerequisite of installing system dependencies such as the compilers (as done with the change I made to the git workflow is a prerequisite that will vary from machine to machine, and it can't go in the tox.ini for that reason, but works fine for the git workflow since it's addressed before running tox -e py.

example: for my RHEL8 dev vm, I had to install the compilers like the following:
yum install gcc gcc-c++ numactl-devel python3-devel

May need more discussion here...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this installation method for vLLM, we are somewhat missing the goal of where we want to go with this story.

Essentially, we want to be able to quickly test if newer version of vLLM would work or not. We already have work going on in PR #67 to automatically figure out next update for vLLM version. Next step, after 67 PR is to automatically update vLLM version and test if that works.

This work, in PR 72, is aimed to make that kind of testing feasible.

So, if we are installing vLLM dependency with hardcoded version here, then it would add another task of update vLLM in one more place, and since tox is totally different file format, it would add more complexity to the system.

Additionally, even out of the context of the use-case I mentioned above, having multiple places for managing vLLM workflow would make it harder to use such a test for more than the current version support, since we would need to change vLLM in multiple places everytime, which becomes error prone.

So we need to figure out new approach for how we can install vLLM in CPU mode using as much standard installation package definition in pyproject.toml as possible


# 2) Install Python build deps
{envpython} -m pip install --upgrade pip
{envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
{envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \
--extra-index-url https://download.pytorch.org/whl/cpu

# 3) Build & install vLLM in CPU mode
sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install"

commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
; -W ignore::DeprecationWarning


; Unclear: We probably want to test wheel packaging
; But! tox will fail when this is set and _any_ interpreter is missing
; Without this, sdist packaging is tested so that's a start.
Expand Down