From f574d71430aab7e56ebdfceeb3d2b261c1c553df Mon Sep 17 00:00:00 2001
From: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
Date: Tue, 6 May 2025 14:50:43 -0500
Subject: [PATCH 1/4] added server boot test files and utils for cpu vllm

Signed-off-by: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
---
 .github/workflows/build.yml |   7 +++
 tests/conftest.py           | 116 ++++++++++++++++++++++++++++++++++++
 tests/test_http_server.py   |   8 +++
 tests/utils.py              |  48 +++++++++++++++
 tox.ini                     |  32 +++++++++-
 5 files changed, 209 insertions(+), 2 deletions(-)
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_http_server.py
 create mode 100644 tests/utils.py
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e85249e..404827e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,6 +11,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+      - name: Install system dependencies
+        run: | 
+         sudo apt-get update  -y 
+         sudo apt-get install -y gcc-12 g++-12 libnuma-dev
+         sudo update-alternatives \
+           --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
+           --slave /usr/bin/g++ g++ /usr/bin/g++-12
       - name: Set up Python 3.11
         uses: actions/setup-python@v4
         with:
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..232e885
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,116 @@
+"""
+Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server
+"""
+
+# Future
+from __future__ import annotations
+
+# Standard
+from collections.abc import Generator
+import argparse
+import asyncio
+import signal
+import sys
+import threading
+import traceback
+
+# Third Party
+from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
+from vllm.utils import FlexibleArgumentParser
+import pytest
+import requests
+
+# Local
+from .utils import TaskFailedError, get_random_port, wait_until
+from vllm_detector_adapter.api_server import add_chat_detection_params, run_server
+from vllm_detector_adapter.utils import LocalEnvVarArgumentParser
+
+
+@pytest.fixture(scope="session")
+def http_server_port() -> int:
+    """Port for the http server"""
+    return get_random_port()
+
+
+@pytest.fixture(scope="session")
+def http_server_url(http_server_port: int) -> str:
+    """Url for the http server"""
+    return f"http://localhost:{http_server_port}"
+
+
+@pytest.fixture
+def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
+    """Mimic: python -m vllm_detector_adapter.api_server --model <MODEL> …"""
+    # Use a 'tiny' model for test purposes
+    model_name = "facebook/opt-125m"
+
+    mock_argv = [
+        "__main__.py",
+        "--model",
+        model_name,
+        f"--port={http_server_port}",
+        "--host=localhost",
+        "--dtype=float32",
+        "--device=cpu",
+    ]
+    monkeypatch.setattr(sys, "argv", mock_argv, raising=False)
+
+    # Build parser like __main__ in api.server.py
+    base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.")
+    parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser))
+    parser = add_chat_detection_params(parser)
+    args = parser.parse_args()
+    validate_parsed_serve_args(args)
+    return args
+
+
+@pytest.fixture
+def _servers(
+    args: argparse.Namespace,
+    http_server_url: str,
+    monkeypatch,
+) -> Generator[None, None, None]:
+    """Start server in background thread"""
+    loop = asyncio.new_event_loop()
+    task: asyncio.Task | None = None
+
+    # Patch signal handling so child threads don’t touch the OS handler table
+    monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None)
+    monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None)
+
+    def target() -> None:
+        nonlocal task
+        task = loop.create_task(run_server(args))
+        try:
+            print("[conftest] starting run server...", flush=True)
+            loop.run_until_complete(task)
+        except Exception as e:
+            print("[conftest] server failed to start:", e, flush=True)
+            traceback.print_exc
+            raise
+        finally:
+            loop.close()
+
+    t = threading.Thread(target=target, name="vllm-detector-server")
+    t.start()
+
+    def _health() -> bool:
+        if task and task.done():
+            raise TaskFailedError(task.exception())
+        requests.get(f"{http_server_url}/health", timeout=1).raise_for_status()
+        return True
+
+    try:
+        wait_until(_health, timeout=120.0, interval=1.0)
+        # tests execute with live server
+        yield
+    finally:
+        if task:
+            task.cancel()
+        t.join()
+
+
+@pytest.fixture
+def api_base_url(_servers, http_server_url: str) -> str:
+    """Pulls up the server and returns the URL to tests"""
+    return http_server_url
diff --git a/tests/test_http_server.py b/tests/test_http_server.py
new file mode 100644
index 0000000..1f885a3
--- /dev/null
+++ b/tests/test_http_server.py
@@ -0,0 +1,8 @@
+# Third Party
+import requests
+
+
+def test_startup(api_base_url):
+    """Smoke-test: test that the servers starts and health endpoint returns a 200 status code"""
+    r = requests.get(f"{api_base_url}/health", timeout=5)
+    assert r.status_code == 200
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..b1129c3
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,48 @@
+"""Utility helpers shared by the test suite."""
+
+# Future
+from __future__ import annotations
+
+# Standard
+from typing import Callable, TypeVar
+import socket
+import time
+
+__all__ = ["get_random_port", "wait_until", "TaskFailedError"]
+
+T = TypeVar("T")
+Predicate = Callable[[], bool]
+
+
+class TaskFailedError(RuntimeError):
+    """Raised when the background server task exits unexpectedly."""
+
+
+def get_random_port() -> int:
+    """Get an unused TCP port"""
+    with socket.socket() as s:
+        s.bind(("localhost", 0))
+        return s.getsockname()[1]
+
+
+def wait_until(
+    predicate: Predicate,
+    *,
+    timeout: float = 30.0,
+    interval: float = 0.5,
+) -> None:
+    """
+    Poll predicate until it returns True or timeout seconds elapse.
+    """
+    deadline = time.monotonic() + timeout
+    while True:
+        try:
+            if predicate():
+                return
+        except Exception:
+            pass
+
+        if time.monotonic() >= deadline:
+            raise TimeoutError("Timed out waiting for condition")
+
+        time.sleep(interval)
diff --git a/tox.ini b/tox.ini
index 79447ad..e79242a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,7 +6,6 @@ description = run tests with pytest with coverage
 extras =
     all
     dev-test
-    vllm
 passenv =
     LOG_LEVEL
     LOG_FILTERS
@@ -15,10 +14,35 @@ passenv =
     LOG_CHANNEL_WIDTH
 setenv =
     DFTYPE = pandas_all
+    VLLM_LOGGING_LEVEL = DEBUG
+    VLLM_TARGET_DEVICE=cpu
 
-commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
+allowlist_externals = 
+    git 
+    rm
+    sh
+
+# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ──
+commands_pre =
+    # 1) clone exactly vLLM v0.8.4
+    rm -rf {envtmpdir}/vllm_source
+    git clone --branch v0.8.4 \
+      https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source
+
+    # 2) install its Python build deps
+    {envpython} -m pip install --upgrade pip
+    {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
+    {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \
+        --extra-index-url https://download.pytorch.org/whl/cpu
+
+    # 3) build & install vLLM in CPU mode
+    sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install"
+    #{envpython} -m pip install {envtmpdir}/vllm_source
+
+commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
 ; -W ignore::DeprecationWarning
 
+
 ; Unclear: We probably want to test wheel packaging
 ; But! tox will fail when this is set and _any_ interpreter is missing
 ; Without this, sdist packaging is tested so that's a start.
@@ -34,4 +58,8 @@ allowlist_externals = ./scripts/fmt.sh
 description = lint with ruff
 extras =
     dev-fmt
+allowlist_externals = 
+    git
+    rm
+    sh
 commands = ruff check vllm_detector_adapter

From 8e0b6dcd92e4eee199905628b7bd41c23e2917d4 Mon Sep 17 00:00:00 2001
From: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
Date: Tue, 6 May 2025 15:17:06 -0500
Subject: [PATCH 2/4] updated tox.ini environments

Signed-off-by: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
---
 tox.ini | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tox.ini b/tox.ini
index e79242a..a29e2f5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,7 +2,7 @@
 envlist = py, lint, fmt
 
 [testenv]
-description = run tests with pytest with coverage
+description = shared defaults for test envs
 extras =
     all
     dev-test
@@ -22,24 +22,25 @@ allowlist_externals =
     rm
     sh
 
-# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ──
+[testenv:py]
+description = run tests with pytest with coverage
+# BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source
 commands_pre =
-    # 1) clone exactly vLLM v0.8.4
+    # 1) Clone vLLM v0.8.4
     rm -rf {envtmpdir}/vllm_source
     git clone --branch v0.8.4 \
       https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source
 
-    # 2) install its Python build deps
+    # 2) Install Python build deps
     {envpython} -m pip install --upgrade pip
     {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
     {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \
         --extra-index-url https://download.pytorch.org/whl/cpu
 
-    # 3) build & install vLLM in CPU mode
+    # 3) Build & install vLLM in CPU mode
     sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install"
-    #{envpython} -m pip install {envtmpdir}/vllm_source
 
-commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
+commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
 ; -W ignore::DeprecationWarning
 
 
@@ -58,8 +59,4 @@ allowlist_externals = ./scripts/fmt.sh
 description = lint with ruff
 extras =
     dev-fmt
-allowlist_externals = 
-    git
-    rm
-    sh
 commands = ruff check vllm_detector_adapter

From e991c29cca8e0e43534d0a37cf3df5e2bef4f3d7 Mon Sep 17 00:00:00 2001
From: swith004 <switherspoon004@gmail.com>
Date: Tue, 6 May 2025 18:14:37 -0400
Subject: [PATCH 3/4] fix typo

Co-authored-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com>
Signed-off-by: swith004 <switherspoon004@gmail.com>
---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 232e885..06bce6e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -55,7 +55,7 @@ def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
     ]
     monkeypatch.setattr(sys, "argv", mock_argv, raising=False)
 
-    # Build parser like __main__ in api.server.py
+    # Build parser like __main__ in api_server.py
     base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.")
     parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser))
     parser = add_chat_detection_params(parser)

From 93eb532b7c6485c9b75a986c664238e9311db3d4 Mon Sep 17 00:00:00 2001
From: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
Date: Tue, 6 May 2025 19:18:54 -0500
Subject: [PATCH 4/4] update tox envs and mock arg script name

Signed-off-by: Shonda-Adena-Witherspoon <shonda.adena.witherspoon@ibm.com>
---
 tests/conftest.py | 2 +-
 tox.ini           | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 06bce6e..2e045fb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,7 +45,7 @@ def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
     model_name = "facebook/opt-125m"
 
     mock_argv = [
-        "__main__.py",
+        "api_server.py",
         "--model",
         model_name,
         f"--port={http_server_port}",
diff --git a/tox.ini b/tox.ini
index a29e2f5..bed334d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -14,8 +14,6 @@ passenv =
     LOG_CHANNEL_WIDTH
 setenv =
     DFTYPE = pandas_all
-    VLLM_LOGGING_LEVEL = DEBUG
-    VLLM_TARGET_DEVICE=cpu
 
 allowlist_externals = 
     git