Skip to content

Commit 7e499b5

Browse files
authored
test: bring back the framework 1 gpu pre-merge tests + clean up pytest markers (#4698)
Signed-off-by: PeaBrane <yanrpei@gmail.com>
1 parent 3cad926 commit 7e499b5

File tree

18 files changed

+78
-179
lines changed

18 files changed

+78
-179
lines changed

.github/workflows/container-validation-backends.yml

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -179,23 +179,14 @@ jobs:
179179
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
180180
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
181181

182-
- name: Run unit tests
182+
- name: Run tests
183183
if: ${{ matrix.platform.arch != 'arm64' }}
184184
uses: ./.github/actions/pytest
185185
with:
186186
image_tag: ${{ steps.build-image.outputs.image_tag }}
187-
pytest_marks: "unit and vllm and gpu_1"
187+
pytest_marks: "pre_merge and vllm"
188188
framework: "vllm"
189-
test_type: "unit"
190-
platform_arch: ${{ matrix.platform.arch }}
191-
- name: Run e2e tests
192-
if: ${{ matrix.platform.arch != 'arm64' }}
193-
uses: ./.github/actions/pytest
194-
with:
195-
image_tag: ${{ steps.build-image.outputs.image_tag }}
196-
pytest_marks: "e2e and vllm and gpu_1 and not slow"
197-
framework: "vllm"
198-
test_type: "e2e, gpu_1"
189+
test_type: "pre_merge"
199190
platform_arch: ${{ matrix.platform.arch }}
200191

201192
sglang:
@@ -246,23 +237,14 @@ jobs:
246237
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
247238
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
248239

249-
- name: Run unit tests
250-
if: ${{ matrix.platform.arch != 'arm64' }}
251-
uses: ./.github/actions/pytest
252-
with:
253-
image_tag: ${{ steps.build-image.outputs.image_tag }}
254-
pytest_marks: "unit and sglang and gpu_1"
255-
framework: "sglang"
256-
test_type: "unit"
257-
platform_arch: ${{ matrix.platform.arch }}
258-
- name: Run e2e tests
240+
- name: Run tests
259241
if: ${{ matrix.platform.arch != 'arm64' }}
260242
uses: ./.github/actions/pytest
261243
with:
262244
image_tag: ${{ steps.build-image.outputs.image_tag }}
263-
pytest_marks: "e2e and sglang and gpu_1"
245+
pytest_marks: "pre_merge and sglang"
264246
framework: "sglang"
265-
test_type: "e2e, gpu_1"
247+
test_type: "pre_merge"
266248
platform_arch: ${{ matrix.platform.arch }}
267249

268250
trtllm:
@@ -313,23 +295,14 @@ jobs:
313295
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
314296
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
315297

316-
- name: Run unit tests
317-
if: ${{ matrix.platform.arch != 'arm64' }}
318-
uses: ./.github/actions/pytest
319-
with:
320-
image_tag: ${{ steps.build-image.outputs.image_tag }}
321-
pytest_marks: "unit and trtllm and gpu_1"
322-
framework: "trtllm"
323-
test_type: "unit"
324-
platform_arch: ${{ matrix.platform.arch }}
325-
- name: Run e2e tests
298+
- name: Run tests
326299
if: ${{ matrix.platform.arch != 'arm64' }}
327300
uses: ./.github/actions/pytest
328301
with:
329302
image_tag: ${{ steps.build-image.outputs.image_tag }}
330-
pytest_marks: "e2e and trtllm and gpu_1 and not slow"
303+
pytest_marks: "pre_merge and trtllm"
331304
framework: "trtllm"
332-
test_type: "e2e, gpu_1"
305+
test_type: "pre_merge"
333306
platform_arch: ${{ matrix.platform.arch }}
334307

335308
deploy-test-fault-tolerance:

.github/workflows/container-validation-dynamo.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
docker compose down
6666
- name: Run pytest (parallel tests with xdist)
6767
env:
68-
PYTEST_MARKS: "pre_merge and parallel"
68+
PYTEST_MARKS: "pre_merge and parallel and not (vllm or sglang or trtllm)"
6969
run: |
7070
docker run -w /workspace \
7171
--name ${{ env.CONTAINER_ID }}_pytest_parallel \
@@ -77,7 +77,7 @@ jobs:
7777
docker cp ${{ env.CONTAINER_ID }}_pytest_parallel:/workspace/${{ env.PYTEST_PARALLEL_XML_FILE }} . || echo "No parallel test report found"
7878
- name: Run pytest (sequential tests)
7979
env:
80-
PYTEST_MARKS: "(pre_merge and not parallel) or mypy"
80+
PYTEST_MARKS: "((pre_merge and not parallel) or mypy) and not (vllm or sglang or trtllm)"
8181
run: |
8282
docker run -w /workspace \
8383
--name ${{ env.CONTAINER_ID }}_pytest \

components/src/dynamo/common/utils/prometheus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def register_engine_metrics_callback(
5555
5656
# Include multiple metric prefixes
5757
register_engine_metrics_callback(
58-
generate_endpoint, REGISTRY, metric_prefix_filter=["vllm:", "lmcache:"]
58+
generate_endpoint, REGISTRY, metric_prefix_filters=["vllm:", "lmcache:"]
5959
)
6060
6161
# With filtering and prefixing for TensorRT-LLM

components/src/dynamo/sglang/tests/test_sglang_prometheus_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
pytest.mark.unit,
1414
pytest.mark.sglang,
1515
pytest.mark.gpu_0,
16+
pytest.mark.pre_merge,
1617
pytest.mark.post_merge,
1718
]
1819

@@ -58,7 +59,7 @@ def test_sglang_use_case(self, sglang_registry):
5859
"""Test SGLang use case: filter to sglang: metrics and exclude python_/process_."""
5960
result = get_prometheus_expfmt(
6061
sglang_registry,
61-
metric_prefix_filter="sglang:",
62+
metric_prefix_filters=["sglang:"],
6263
exclude_prefixes=["python_", "process_"],
6364
)
6465

components/src/dynamo/trtllm/tests/test_trtllm_autodeploy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# `.github/workflows/container-validation-backends.yml` does not make use of
2020
# the `gpu_0` marker.
2121
pytest.mark.gpu_1,
22+
pytest.mark.pre_merge,
2223
]
2324
_PYTORCH_LLM_CLS_NAME = "dynamo.trtllm.engine.LLM"
2425
_AUTODEPLOY_LLM_CLS_NAME = "tensorrt_llm._torch.auto_deploy.LLM"

components/src/dynamo/trtllm/tests/test_trtllm_prometheus_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
pytest.mark.unit,
1414
pytest.mark.trtllm,
1515
pytest.mark.gpu_0,
16+
pytest.mark.pre_merge,
1617
pytest.mark.post_merge,
1718
]
1819

components/src/dynamo/trtllm/tests/test_trtllm_unit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
pytest.mark.unit,
2424
pytest.mark.trtllm,
2525
pytest.mark.gpu_1,
26+
pytest.mark.pre_merge,
2627
]
2728

2829

components/src/dynamo/vllm/tests/test_vllm_prometheus_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
pytest.mark.unit,
1414
pytest.mark.vllm,
1515
pytest.mark.gpu_0,
16+
pytest.mark.pre_merge,
1617
pytest.mark.post_merge,
1718
]
1819

@@ -56,7 +57,7 @@ def test_vllm_use_case(self, vllm_registry):
5657
"""Test vLLM use case: filter to vllm: metrics and exclude python_/process_."""
5758
result = get_prometheus_expfmt(
5859
vllm_registry,
59-
metric_prefix_filter="vllm:",
60+
metric_prefix_filters=["vllm:"],
6061
exclude_prefixes=["python_", "process_"],
6162
)
6263

tests/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ Markers are required for all tests. They are used for test selection in CI and l
7171
| Test Type [required] | unit, integration, e2e, benchmark, stress, multimodal | Nature of the test |
7272
| Hardware [required] | gpu_0, gpu_1, gpu_2, gpu_4, gpu_8, h100 | Number/type of GPUs required |
7373
| Component/Framework | vllm, trtllm, sglang, kvbm, planner, router | Backend or component specificity |
74-
| Other | slow, skip, xfail | Special handling |
74+
| Execution | parallel | Test can run in parallel with pytest-xdist |
75+
| Other | slow, skip, xfail, mypy, custom_build | Special handling |
7576

7677
### Example
7778
```python

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121

2222
logger = logging.getLogger(__name__)
2323

24+
pytestmark = [
25+
pytest.mark.trtllm,
26+
pytest.mark.gpu_1,
27+
pytest.mark.e2e,
28+
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
29+
]
30+
2431

2532
class DynamoWorkerProcess(ManagedProcess):
2633
"""Process manager for Dynamo worker with TensorRT-LLM backend"""
@@ -127,10 +134,6 @@ def is_ready(self, response) -> bool:
127134
return False
128135

129136

130-
@pytest.mark.trtllm
131-
@pytest.mark.gpu_1
132-
@pytest.mark.e2e
133-
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
134137
@pytest.mark.nightly
135138
def test_request_cancellation_trtllm_aggregated(
136139
request, runtime_services, predownload_models
@@ -205,10 +208,6 @@ def test_request_cancellation_trtllm_aggregated(
205208
logger.info(f"{description} detected successfully")
206209

207210

208-
@pytest.mark.trtllm
209-
@pytest.mark.gpu_1
210-
@pytest.mark.e2e
211-
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
212211
@pytest.mark.nightly
213212
def test_request_cancellation_trtllm_decode_cancel(
214213
request, runtime_services, predownload_models
@@ -282,11 +281,7 @@ def test_request_cancellation_trtllm_decode_cancel(
282281
)
283282

284283

285-
@pytest.mark.trtllm
286-
@pytest.mark.gpu_1
287-
@pytest.mark.e2e
288284
@pytest.mark.nightly
289-
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
290285
def test_request_cancellation_trtllm_prefill_cancel(
291286
request, runtime_services, predownload_models
292287
):
@@ -369,10 +364,6 @@ def test_request_cancellation_trtllm_prefill_cancel(
369364
)
370365

371366

372-
@pytest.mark.trtllm
373-
@pytest.mark.gpu_1
374-
@pytest.mark.e2e
375-
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
376367
@pytest.mark.xfail(
377368
reason="May fail due to unknown reason with TRT-LLM or backend implementation",
378369
strict=False,

0 commit comments

Comments
 (0)