Skip to content

Commit bc9d7b5

Browse files
[CI/Build] Split up Distributed Tests (vllm-project#25572)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent fe6b19c commit bc9d7b5

File tree

2 files changed

+28
-18
lines changed

2 files changed

+28
-18
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -870,25 +870,27 @@ steps:
870870
- NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
871871
- python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
872872

873-
- label: Distributed Tests (2 GPUs) # 110min
874-
timeout_in_minutes: 150
873+
- label: Distributed Tests (2 GPUs) # 68min
874+
timeout_in_minutes: 90
875875
mirror_hardwares: [amdexperimental]
876876
working_dir: "/vllm-workspace/tests"
877877
num_gpus: 2
878878
source_file_dependencies:
879+
- vllm/compilation/
879880
- vllm/distributed/
880881
- vllm/engine/
881882
- vllm/executor/
882-
- vllm/model_executor/models/
883-
- tests/distributed/
884-
- vllm/compilation
885883
- vllm/worker/worker_base.py
886-
- entrypoints/llm/test_collective_rpc.py
884+
- vllm/v1/engine/
885+
- vllm/v1/worker/
886+
- tests/compile/test_basic_correctness.py
887+
- tests/compile/test_wrapper.py
888+
- tests/distributed/
889+
- tests/entrypoints/llm/test_collective_rpc.py
887890
- tests/v1/test_async_llm_dp.py
888891
- tests/v1/test_external_lb_dp.py
889892
- tests/v1/entrypoints/openai/test_multi_api_servers.py
890-
- vllm/v1/engine/
891-
- vllm/v1/worker/
893+
- tests/v1/shutdown
892894
- tests/v1/worker/test_worker_memory_snapshot.py
893895
commands:
894896
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py
@@ -898,20 +900,29 @@ steps:
898900
- pytest -v -s ./compile/test_basic_correctness.py
899901
- pytest -v -s ./compile/test_wrapper.py
900902
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed'
903+
- pytest -v -s distributed/test_sequence_parallel.py
904+
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
905+
- pytest -v -s v1/worker/test_worker_memory_snapshot.py
906+
907+
- label: Distributed Model Tests (2 GPUs) # 37min
908+
timeout_in_minutes: 50
909+
mirror_hardwares: [amdexperimental]
910+
working_dir: "/vllm-workspace/tests"
911+
num_gpus: 2
912+
source_file_dependencies:
913+
- vllm/model_executor/model_loader/sharded_state_loader.py
914+
- vllm/model_executor/models/
915+
- tests/basic_correctness/
916+
- tests/model_executor/model_loader/test_sharded_state_loader.py
917+
- tests/models/
918+
commands:
901919
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
920+
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
902921
# Avoid importing model tests that cause CUDA reinitialization error
903922
- pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
904923
- pytest models/language -v -s -m 'distributed(num_gpus=2)'
905924
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
906925
- VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
907-
# test sequence parallel
908-
- pytest -v -s distributed/test_sequence_parallel.py
909-
# this test fails consistently.
910-
# TODO: investigate and fix
911-
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
912-
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
913-
- pytest -v -s models/multimodal/generation/test_maverick.py
914-
- pytest -v -s v1/worker/test_worker_memory_snapshot.py
915926

916927
- label: Plugin Tests (2 GPUs) # 40min
917928
timeout_in_minutes: 60

tests/test_sharded_state_loader.py renamed to tests/model_executor/model_loader/test_sharded_state_loader.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,7 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
9191
@pytest.mark.parametrize("enable_lora", [False, True])
9292
@pytest.mark.parametrize("tp_size", [1, 2])
9393
def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
94-
llama_3p2_1b_files,
95-
monkeypatch: pytest.MonkeyPatch):
94+
llama_3p2_1b_files):
9695
if num_gpus_available < tp_size:
9796
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
9897

0 commit comments

Comments
 (0)