test: bring back the framework 1 gpu pre-merge tests + clean up pytest markers (#4698)

PeaBrane · web-flow · commit 7e499b5c460f · 2025-12-02T17:12:10.000-08:00
Signed-off-by: PeaBrane &lt;yanrpei@gmail.com&gt;
diff --git a/.github/workflows/container-validation-backends.yml b/.github/workflows/container-validation-backends.yml
@@ -179,23 +179,14 @@ jobs:
           azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
           azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
 
-      - name: Run unit tests
+      - name: Run tests
         if: ${{ matrix.platform.arch != 'arm64' }}
         uses: ./.github/actions/pytest
         with:
           image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "unit and vllm and gpu_1"
+          pytest_marks: "pre_merge and vllm"
           framework: "vllm"
-          test_type: "unit"
-          platform_arch: ${{ matrix.platform.arch }}
-      - name: Run e2e tests
-        if: ${{ matrix.platform.arch != 'arm64' }}
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "e2e and vllm and gpu_1 and not slow"
-          framework: "vllm"
-          test_type: "e2e, gpu_1"
+          test_type: "pre_merge"
           platform_arch: ${{ matrix.platform.arch }}
 
   sglang:
@@ -246,23 +237,14 @@ jobs:
           azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
           azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
 
-      - name: Run unit tests
-        if: ${{ matrix.platform.arch != 'arm64' }}
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "unit and sglang and gpu_1"
-          framework: "sglang"
-          test_type: "unit"
-          platform_arch: ${{ matrix.platform.arch }}
-      - name: Run e2e tests
+      - name: Run tests
         if: ${{ matrix.platform.arch != 'arm64' }}
         uses: ./.github/actions/pytest
         with:
           image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "e2e and sglang and gpu_1"
+          pytest_marks: "pre_merge and sglang"
           framework: "sglang"
-          test_type: "e2e, gpu_1"
+          test_type: "pre_merge"
           platform_arch: ${{ matrix.platform.arch }}
 
   trtllm:
@@ -313,23 +295,14 @@ jobs:
           azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
           azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
 
-      - name: Run unit tests
-        if: ${{ matrix.platform.arch != 'arm64' }}
-        uses: ./.github/actions/pytest
-        with:
-          image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "unit and trtllm and gpu_1"
-          framework: "trtllm"
-          test_type: "unit"
-          platform_arch: ${{ matrix.platform.arch }}
-      - name: Run e2e tests
+      - name: Run tests
         if: ${{ matrix.platform.arch != 'arm64' }}
         uses: ./.github/actions/pytest
         with:
           image_tag: ${{ steps.build-image.outputs.image_tag }}
-          pytest_marks: "e2e and trtllm and gpu_1 and not slow"
+          pytest_marks: "pre_merge and trtllm"
           framework: "trtllm"
-          test_type: "e2e, gpu_1"
+          test_type: "pre_merge"
           platform_arch: ${{ matrix.platform.arch }}
 
   deploy-test-fault-tolerance:
diff --git a/.github/workflows/container-validation-dynamo.yml b/.github/workflows/container-validation-dynamo.yml
@@ -65,7 +65,7 @@ jobs:
           docker compose down
       - name: Run pytest (parallel tests with xdist)
         env:
-          PYTEST_MARKS: "pre_merge and parallel"
+          PYTEST_MARKS: "pre_merge and parallel and not (vllm or sglang or trtllm)"
         run: |
           docker run -w /workspace \
             --name ${{ env.CONTAINER_ID }}_pytest_parallel \
@@ -77,7 +77,7 @@ jobs:
           docker cp ${{ env.CONTAINER_ID }}_pytest_parallel:/workspace/${{ env.PYTEST_PARALLEL_XML_FILE }} . || echo "No parallel test report found"
       - name: Run pytest (sequential tests)
         env:
-          PYTEST_MARKS: "(pre_merge and not parallel) or mypy"
+          PYTEST_MARKS: "((pre_merge and not parallel) or mypy) and not (vllm or sglang or trtllm)"
         run: |
           docker run -w /workspace \
             --name ${{ env.CONTAINER_ID }}_pytest \
diff --git a/components/src/dynamo/common/utils/prometheus.py b/components/src/dynamo/common/utils/prometheus.py
@@ -55,7 +55,7 @@ def register_engine_metrics_callback(
 
         # Include multiple metric prefixes
         register_engine_metrics_callback(
-            generate_endpoint, REGISTRY, metric_prefix_filter=["vllm:", "lmcache:"]
+            generate_endpoint, REGISTRY, metric_prefix_filters=["vllm:", "lmcache:"]
         )
 
         # With filtering and prefixing for TensorRT-LLM
diff --git a/components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py b/components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py
@@ -13,6 +13,7 @@
     pytest.mark.unit,
     pytest.mark.sglang,
     pytest.mark.gpu_0,
+    pytest.mark.pre_merge,
     pytest.mark.post_merge,
 ]
 
@@ -58,7 +59,7 @@ def test_sglang_use_case(self, sglang_registry):
         """Test SGLang use case: filter to sglang: metrics and exclude python_/process_."""
         result = get_prometheus_expfmt(
             sglang_registry,
-            metric_prefix_filter="sglang:",
+            metric_prefix_filters=["sglang:"],
             exclude_prefixes=["python_", "process_"],
         )
 
diff --git a/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py b/components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py
@@ -19,6 +19,7 @@
     # `.github/workflows/container-validation-backends.yml` does not make use of
     # the `gpu_0` marker.
     pytest.mark.gpu_1,
+    pytest.mark.pre_merge,
 ]
 _PYTORCH_LLM_CLS_NAME = "dynamo.trtllm.engine.LLM"
 _AUTODEPLOY_LLM_CLS_NAME = "tensorrt_llm._torch.auto_deploy.LLM"
diff --git a/components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py b/components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py
@@ -13,6 +13,7 @@
     pytest.mark.unit,
     pytest.mark.trtllm,
     pytest.mark.gpu_0,
+    pytest.mark.pre_merge,
     pytest.mark.post_merge,
 ]
 
diff --git a/components/src/dynamo/trtllm/tests/test_trtllm_unit.py b/components/src/dynamo/trtllm/tests/test_trtllm_unit.py
@@ -23,6 +23,7 @@
     pytest.mark.unit,
     pytest.mark.trtllm,
     pytest.mark.gpu_1,
+    pytest.mark.pre_merge,
 ]
 
 
diff --git a/components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py b/components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py
@@ -13,6 +13,7 @@
     pytest.mark.unit,
     pytest.mark.vllm,
     pytest.mark.gpu_0,
+    pytest.mark.pre_merge,
     pytest.mark.post_merge,
 ]
 
@@ -56,7 +57,7 @@ def test_vllm_use_case(self, vllm_registry):
         """Test vLLM use case: filter to vllm: metrics and exclude python_/process_."""
         result = get_prometheus_expfmt(
             vllm_registry,
-            metric_prefix_filter="vllm:",
+            metric_prefix_filters=["vllm:"],
             exclude_prefixes=["python_", "process_"],
         )
 
diff --git a/tests/README.md b/tests/README.md
@@ -71,7 +71,8 @@ Markers are required for all tests. They are used for test selection in CI and l
 | Test Type [required]    | unit, integration, e2e, benchmark, stress, multimodal   | Nature of the test                 |
 | Hardware [required]     | gpu_0, gpu_1, gpu_2,  gpu_4, gpu_8, h100      | Number/type of GPUs required       |
 | Component/Framework     | vllm, trtllm, sglang, kvbm, planner, router    | Backend or component specificity   |
-| Other                   | slow, skip, xfail        | Special handling                   |
+| Execution               | parallel                 | Test can run in parallel with pytest-xdist |
+| Other                   | slow, skip, xfail, mypy, custom_build        | Special handling                   |
 
 ### Example
 ```python
diff --git a/tests/fault_tolerance/cancellation/test_trtllm.py b/tests/fault_tolerance/cancellation/test_trtllm.py
@@ -21,6 +21,13 @@
 
 logger = logging.getLogger(__name__)
 
+pytestmark = [
+    pytest.mark.trtllm,
+    pytest.mark.gpu_1,
+    pytest.mark.e2e,
+    pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
+]
+
 
 class DynamoWorkerProcess(ManagedProcess):
     """Process manager for Dynamo worker with TensorRT-LLM backend"""
@@ -127,10 +134,6 @@ def is_ready(self, response) -> bool:
         return False
 
 
-@pytest.mark.trtllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 @pytest.mark.nightly
 def test_request_cancellation_trtllm_aggregated(
     request, runtime_services, predownload_models
@@ -205,10 +208,6 @@ def test_request_cancellation_trtllm_aggregated(
                 logger.info(f"{description} detected successfully")
 
 
-@pytest.mark.trtllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 @pytest.mark.nightly
 def test_request_cancellation_trtllm_decode_cancel(
     request, runtime_services, predownload_models
@@ -282,11 +281,7 @@ def test_request_cancellation_trtllm_decode_cancel(
                 )
 
 
-@pytest.mark.trtllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
 @pytest.mark.nightly
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 def test_request_cancellation_trtllm_prefill_cancel(
     request, runtime_services, predownload_models
 ):
@@ -369,10 +364,6 @@ def test_request_cancellation_trtllm_prefill_cancel(
                 )
 
 
-@pytest.mark.trtllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
 @pytest.mark.xfail(
     reason="May fail due to unknown reason with TRT-LLM or backend implementation",
     strict=False,
diff --git a/tests/fault_tolerance/migration/test_vllm.py b/tests/fault_tolerance/migration/test_vllm.py
@@ -23,6 +23,14 @@
 
 logger = logging.getLogger(__name__)
 
+pytestmark = [
+    pytest.mark.vllm,
+    pytest.mark.gpu_1,
+    pytest.mark.e2e,
+    pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
+    pytest.mark.nightly,
+]
+
 
 class DynamoWorkerProcess(ManagedProcess):
     """Process manager for Dynamo worker with vLLM backend"""
@@ -100,11 +108,6 @@ def is_ready(self, response) -> bool:
         return False
 
 
-@pytest.mark.vllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
-@pytest.mark.nightly
 def test_request_migration_vllm_worker_failure(
     request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -151,11 +154,6 @@ def test_request_migration_vllm_worker_failure(
                 verify_migration_occurred(frontend)
 
 
-@pytest.mark.vllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
-@pytest.mark.nightly
 def test_request_migration_vllm_graceful_shutdown(
     request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -203,11 +201,6 @@ def test_request_migration_vllm_graceful_shutdown(
                 verify_migration_occurred(frontend)
 
 
-@pytest.mark.vllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
-@pytest.mark.nightly
 def test_no_request_migration_vllm_worker_failure(
     request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
@@ -268,11 +261,6 @@ def test_no_request_migration_vllm_worker_failure(
                     ), f"Unexpected migration message: {e}"
 
 
-@pytest.mark.vllm
-@pytest.mark.gpu_1
-@pytest.mark.e2e
-@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
-@pytest.mark.nightly
 def test_no_request_migration_vllm_graceful_shutdown(
     request, runtime_services, predownload_models, set_ucx_tls_no_mm
 ):
diff --git a/tests/frontend/test_completion_mocker_engine.py b/tests/frontend/test_completion_mocker_engine.py
@@ -22,6 +22,13 @@
 
 TEST_MODEL = QWEN
 
+pytestmark = [
+    pytest.mark.e2e,
+    pytest.mark.gpu_1,
+    pytest.mark.post_merge,
+    pytest.mark.model(TEST_MODEL),
+]
+
 
 class DynamoFrontendProcess(ManagedProcess):
     """Process manager for Dynamo frontend"""
@@ -145,10 +152,6 @@ def start_services(request, runtime_services):
 
 
 @pytest.mark.usefixtures("start_services")
-@pytest.mark.e2e
-@pytest.mark.gpu_1
-@pytest.mark.post_merge
-@pytest.mark.model(TEST_MODEL)
 def test_completion_string_prompt() -> None:
     payload: Dict[str, Any] = {
         "model": TEST_MODEL,
@@ -165,10 +168,6 @@ def test_completion_string_prompt() -> None:
 
 
 @pytest.mark.usefixtures("start_services")
-@pytest.mark.e2e
-@pytest.mark.gpu_1
-@pytest.mark.post_merge
-@pytest.mark.model(TEST_MODEL)
 def test_completion_empty_array_prompt() -> None:
     payload: Dict[str, Any] = {
         "model": TEST_MODEL,
@@ -185,10 +184,6 @@ def test_completion_empty_array_prompt() -> None:
 
 
 @pytest.mark.usefixtures("start_services")
-@pytest.mark.e2e
-@pytest.mark.gpu_1
-@pytest.mark.post_merge
-@pytest.mark.model(TEST_MODEL)
 def test_completion_single_element_array_prompt() -> None:
     payload: Dict[str, Any] = {
         "model": TEST_MODEL,
@@ -205,10 +200,6 @@ def test_completion_single_element_array_prompt() -> None:
 
 
 @pytest.mark.usefixtures("start_services")
-@pytest.mark.e2e
-@pytest.mark.gpu_1
-@pytest.mark.post_merge
-@pytest.mark.model(TEST_MODEL)
 def test_completion_multi_element_array_prompt() -> None:
     payload: Dict[str, Any] = {
         "model": TEST_MODEL,
diff --git a/tests/frontend/test_vllm.py b/tests/frontend/test_vllm.py
diff --git a/tests/router/test_router_e2e_with_mockers.py b/tests/router/test_router_e2e_with_mockers.py
diff --git a/tests/router/test_router_e2e_with_vllm.py b/tests/router/test_router_e2e_with_vllm.py
diff --git a/tests/serve/test_sglang.py b/tests/serve/test_sglang.py
diff --git a/tests/serve/test_trtllm.py b/tests/serve/test_trtllm.py
diff --git a/tests/serve/test_vllm.py b/tests/serve/test_vllm.py

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ def register_engine_metrics_callback(`
`55`	`55`
`56`	`56`	`# Include multiple metric prefixes`
`57`	`57`	`register_engine_metrics_callback(`
`58`		`- generate_endpoint, REGISTRY, metric_prefix_filter=["vllm:", "lmcache:"]`
	`58`	`+ generate_endpoint, REGISTRY, metric_prefix_filters=["vllm:", "lmcache:"]`
`59`	`59`	`)`
`60`	`60`
`61`	`61`	`# With filtering and prefixing for TensorRT-LLM`
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`pytest.mark.unit,`
`14`	`14`	`pytest.mark.sglang,`
`15`	`15`	`pytest.mark.gpu_0,`
	`16`	`+ pytest.mark.pre_merge,`
`16`	`17`	`pytest.mark.post_merge,`
`17`	`18`	`]`
`18`	`19`
`@@ -58,7 +59,7 @@ def test_sglang_use_case(self, sglang_registry):`
`58`	`59`	`"""Test SGLang use case: filter to sglang: metrics and exclude python_/process_."""`
`59`	`60`	`result = get_prometheus_expfmt(`
`60`	`61`	`sglang_registry,`
`61`		`- metric_prefix_filter="sglang:",`
	`62`	`+ metric_prefix_filters=["sglang:"],`
`62`	`63`	`exclude_prefixes=["python_", "process_"],`
`63`	`64`	`)`
`64`	`65`
Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	# `.github/workflows/container-validation-backends.yml` does not make use of
`20`	`20`	# the `gpu_0` marker.
`21`	`21`	`pytest.mark.gpu_1,`
	`22`	`+ pytest.mark.pre_merge,`
`22`	`23`	`]`
`23`	`24`	`_PYTORCH_LLM_CLS_NAME = "dynamo.trtllm.engine.LLM"`
`24`	`25`	`_AUTODEPLOY_LLM_CLS_NAME = "tensorrt_llm._torch.auto_deploy.LLM"`
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`pytest.mark.unit,`
`24`	`24`	`pytest.mark.trtllm,`
`25`	`25`	`pytest.mark.gpu_1,`
	`26`	`+ pytest.mark.pre_merge,`
`26`	`27`	`]`
`27`	`28`
`28`	`29`