55from pathlib import Path
66
77import pytest
8+ import torch
89import yaml
910from click .testing import CliRunner
11+ from utils .cpp_paths import llm_root # noqa: F401
1012
1113from tensorrt_llm .commands .bench import main
1214
@@ -43,7 +45,7 @@ def timeout_handler(signum, frame):
4345
4446
4547@pytest .fixture (scope = "module" )
46- def shared_dataset (llm_root ):
48+ def shared_dataset (llm_root ): # noqa: F811
4749 """Prepare dataset once for all tests in this module."""
4850 model_name = "meta-llama/Llama-3.1-8B"
4951 with tempfile .TemporaryDirectory () as temp_dir :
@@ -102,14 +104,15 @@ def _prepare_dataset(root_dir: str, temp_dir: str, model_path_or_name: str, num_
102104 "NCCL" ,
103105 ],
104106)
105- def test_allreduce_strategies (llm_root , shared_dataset , allreduce_strategy ):
107+ def test_allreduce_strategies (llm_root , shared_dataset , allreduce_strategy ): # noqa: F811
106108 """Test different allreduce strategies with multi-GPU configuration.
107109
108- This test validates that all allreduce strategies work correctly with TP=4 .
110+ This test validates that all allreduce strategies work correctly with TP=2 .
109111 Note: TWOSHOT strategy will automatically fall back to ONESHOT when sequence
110112 length is smaller than TP size during initialization.
111113
112114 Test has a 300 second timeout to prevent indefinite hangs.
115+ Test will be skipped if fewer than 2 GPUs are available.
113116
114117 Args:
115118 llm_root: Root directory fixture
@@ -120,10 +123,13 @@ def test_allreduce_strategies(llm_root, shared_dataset, allreduce_strategy):
120123 TEST_TIMEOUT_SECONDS = 300
121124
122125 model_name = "meta-llama/Llama-3.1-8B"
123- tp_size = 4
126+ tp_size = 2
124127 max_batch_size = 256
125128 max_num_tokens = 8192
126129
130+ if not torch .cuda .is_available () or torch .cuda .device_count () < tp_size :
131+ pytest .skip (f"Allreduce strategy test requires at least { tp_size } GPUs, skipping" )
132+
127133 with tempfile .TemporaryDirectory () as temp_dir :
128134 # Write shared dataset to temp location
129135 dataset_path = Path (temp_dir , "synthetic_128_128.txt" )
0 commit comments