@@ -167,7 +167,7 @@ steps:
167167 - tests/distributed/test_utils
168168 - tests/distributed/test_pynccl
169169 - tests/distributed/test_events
170- - tests/compile/test_basic_correctness
170+ - tests/compile/fullgraph/ test_basic_correctness.py
171171 - examples/offline_inference/rlhf.py
172172 - examples/offline_inference/rlhf_colocate.py
173173 - tests/examples/offline_inference/data_parallel.py
@@ -197,7 +197,7 @@ steps:
197197 - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py
198198 - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp
199199 - pytest -v -s distributed/test_utils.py
200- - pytest -v -s compile/test_basic_correctness.py
200+ - pytest -v -s compile/fullgraph/ test_basic_correctness.py
201201 - pytest -v -s distributed/test_pynccl.py
202202 - pytest -v -s distributed/test_events.py
203203 - pytest -v -s distributed/test_symm_mem_allreduce.py
@@ -445,18 +445,12 @@ steps:
445445 - vllm/
446446 - tests/compile
447447 commands :
448- - pytest -v -s compile/test_graph_partition.py
449- - pytest -v -s compile/test_config.py
450- - pytest -v -s compile/test_pass_manager.py
451- - pytest -v -s compile/test_fusion.py
452- - pytest -v -s compile/test_fusion_attn.py
453- - pytest -v -s compile/test_functionalization.py
454- - pytest -v -s compile/test_silu_mul_quant_fusion.py
455- - pytest -v -s compile/test_fusion_all_reduce.py
456- - pytest -v -s compile/test_decorator.py
457- - pytest -v -s compile/test_noop_elimination.py
458- - pytest -v -s compile/test_aot_compile.py
459- - pytest -v -s compile/test_qk_norm_rope_fusion.py
448+ # Run unit tests defined directly under compile/,
449+ # not including subdirectories, which are usually heavier
450+ # tests covered elsewhere.
451+ # Use `find` to launch multiple instances of pytest so that
452+ # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
453+ - " find compile/ -maxdepth 1 -name 'test_*.py' -exec pytest -s -v {} \\\\ ;"
460454
461455- label : PyTorch Fullgraph Smoke Test # 15min
462456 timeout_in_minutes : 30
@@ -466,9 +460,11 @@ steps:
466460 - vllm/
467461 - tests/compile
468462 commands :
469- - pytest -v -s compile/test_basic_correctness.py
470- - pytest -v -s compile/test_multimodal_compile.py
471- - pytest -v -s compile/piecewise/
463+ # Run smoke tests under fullgraph directory, except test_full_graph.py
464+ # as it is a heavy test that is covered in other steps.
465+ # Use `find` to launch multiple instances of pytest so that
466+ # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
467+ - " find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\ ;"
472468
473469- label : PyTorch Fullgraph Test # 27min
474470 timeout_in_minutes : 40
@@ -479,10 +475,10 @@ steps:
479475 - tests/compile
480476 commands :
481477 # fp8 kv scales not supported on sm89, tested on Blackwell instead
482- - pytest -v -s compile/test_full_graph.py -k 'not test_fp8_kv_scale_compile'
478+ - pytest -v -s compile/fullgraph/ test_full_graph.py -k 'not test_fp8_kv_scale_compile'
483479 # Limit to no custom ops to reduce running time
484480 # Wrap with quotes to escape yaml and avoid starting -k string with a -
485- - " pytest -v -s compile/test_fusions_e2e.py -k 'TRITON and not +quant_fp8 and not Llama-4'"
481+ - " pytest -v -s compile/distributed/ test_fusions_e2e.py -k 'TRITON and not +quant_fp8 and not Llama-4'"
486482
487483- label : Cudagraph test
488484 timeout_in_minutes : 20
@@ -939,17 +935,22 @@ steps:
939935 - vllm/model_executor/layers/layernorm.py
940936 - vllm/model_executor/layers/activation.py
941937 - vllm/model_executor/layers/quantization/input_quant_fp8.py
938+ - tests/compile/test_fusion_attn.py
939+ - tests/compile/test_silu_mul_quant_fusion.py
940+ - tests/compile/distributed/test_fusion_all_reduce.py
941+ - tests/compile/distributed/test_fusions_e2e.py
942+ - tests/compile/fullgraph/test_full_graph.py
942943 commands :
943944 - nvidia-smi
944945 - pytest -v -s tests/compile/test_fusion_attn.py
945946 - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py
946947 # this runner has 2 GPUs available even though num_gpus=2 is not set
947- - pytest -v -s tests/compile/test_fusion_all_reduce.py
948+ - pytest -v -s tests/compile/distributed/ test_fusion_all_reduce.py
948949 # Limit to Inductor partition, no custom ops, and allreduce & attn fusion to reduce running time
949950 # Wrap with quotes to escape yaml
950- - " pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm -k 'True and not +quant_fp8 and not +rms_norm'"
951+ - " pytest -v -s tests/compile/distributed/ test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm -k 'True and not +quant_fp8 and not +rms_norm'"
951952 # test_fp8_kv_scale_compile requires FlashAttention (not supported on default L4/L40)
952- - pytest -v -s tests/compile/test_full_graph.py::test_fp8_kv_scale_compile
953+ - pytest -v -s tests/compile/fullgraph/ test_full_graph.py::test_fp8_kv_scale_compile
953954
954955- label : Blackwell Fusion E2E Tests # 30 min
955956 timeout_in_minutes : 40
@@ -966,12 +967,11 @@ steps:
966967 - vllm/model_executor/layers/layernorm.py
967968 - vllm/model_executor/layers/activation.py
968969 - vllm/model_executor/layers/quantization/input_quant_fp8.py
969- - tests/compile/test_fusions_e2e.py
970- - tests/compile/test_full_graph.py
970+ - tests/compile/distributed/test_fusions_e2e.py
971971 commands :
972972 - nvidia-smi
973973 # Run all e2e fusion tests
974- - pytest -v -s tests/compile/test_fusions_e2e.py
974+ - pytest -v -s tests/compile/distributed/ test_fusions_e2e.py
975975
976976- label : Blackwell GPT-OSS Eval
977977 timeout_in_minutes : 60
@@ -1069,7 +1069,7 @@ steps:
10691069 - vllm/worker/worker_base.py
10701070 - vllm/v1/engine/
10711071 - vllm/v1/worker/
1072- - tests/compile/test_basic_correctness.py
1072+ - tests/compile/fullgraph/ test_basic_correctness.py
10731073 - tests/compile/test_wrapper.py
10741074 - tests/distributed/
10751075 - tests/entrypoints/llm/test_collective_rpc.py
@@ -1084,7 +1084,7 @@ steps:
10841084 - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
10851085 - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
10861086 - pytest -v -s entrypoints/llm/test_collective_rpc.py
1087- - pytest -v -s ./compile/test_basic_correctness.py
1087+ - pytest -v -s ./compile/fullgraph/ test_basic_correctness.py
10881088 - pytest -v -s ./compile/test_wrapper.py
10891089 - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
10901090 - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
@@ -1264,10 +1264,10 @@ steps:
12641264 working_dir : " /vllm-workspace/"
12651265 num_gpus : 2
12661266 commands :
1267- - pytest -v -s tests/compile/test_async_tp.py
1268- - pytest -v -s tests/compile/test_sequence_parallelism.py
1269- - pytest -v -s tests/compile/test_fusion_all_reduce.py
1270- - " pytest -v -s tests/compile/test_fusions_e2e.py -k 'not Llama-4'"
1267+ - pytest -v -s tests/compile/distributed/ test_async_tp.py
1268+ - pytest -v -s tests/compile/distributed/ test_sequence_parallelism.py
1269+ - pytest -v -s tests/compile/distributed/ test_fusion_all_reduce.py
1270+ - " pytest -v -s tests/compile/distributed/ test_fusions_e2e.py -k 'not Llama-4'"
12711271 - pytest -v -s tests/distributed/test_sequence_parallel.py
12721272 - pytest -v -s tests/distributed/test_context_parallel.py
12731273 - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
0 commit comments