Skip to content

Commit 31c1afb

Browse files
committed
Add Qwen3-Next benchmarks to CI
Signed-off-by: Tailing Yuan <yuantailing@gmail.com>
1 parent d6899e4 commit 31c1afb

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

examples/layer_wise_benchmarks/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --scaled-from 16 --moe-back
4949
NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --scaled-from 16 --no-enable-attention-dp
5050

5151
# Run Qwen3-Next (balanced routing is not implemented)
52-
NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_ctx.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
53-
NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_gen.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
52+
NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_ctx.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
53+
NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_gen.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
5454

5555
# Run with DeepEP A2A
5656
NP=4 TRTLLM_FORCE_ALLTOALL_METHOD=DeepEP ./mpi_launch.sh ./run_single.sh config_ctx.yaml --moe-backend WIDEEP

tests/integration/test_lists/test-db/l0_b200.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ l0_b200:
7676
- unittest/_torch/modeling -k "modeling_llama"
7777
- unittest/_torch/modeling -k "modeling_mixtral"
7878
- unittest/_torch/modeling -k "modeling_gpt_oss"
79+
- unittest/tools/test_layer_wise_benchmarks.py::test_qwen3_next_gen_tep[1]
7980
# ------------- AutoDeploy tests ---------------
8081
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
8182
- unittest/_torch/auto_deploy/unit/singlegpu

tests/unittest/tools/test_layer_wise_benchmarks.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,27 @@ def test_deepseek_r1_gen_scaled_from_16_dep(llm_root):
6767
**os.environ,
6868
"NP": "4",
6969
})
70+
71+
72+
@pytest.mark.parametrize("tp_size", [1, 2, 4])
73+
def test_qwen3_next_gen_tep(llm_root, tp_size):
74+
if torch.cuda.device_count() < tp_size:
75+
pytest.skip(f"needs {tp_size:d} GPUs to run this test")
76+
model_root = llm_models_root(check=True)
77+
check_call([
78+
"./mpi_launch.sh",
79+
"./run_single.sh",
80+
"config_gen.yaml",
81+
"--model",
82+
model_root / "Qwen3" / "Qwen3-Next-80B-A3B-Instruct",
83+
"--layer-indices=6,7",
84+
"--no-enable-attention-dp",
85+
"--moe-backend=TRTLLM",
86+
"--balance-method=NotModified",
87+
],
88+
cwd=llm_root / "examples" / "layer_wise_benchmarks",
89+
env={
90+
**os.environ,
91+
"NP": f"{tp_size:d}",
92+
"TRTLLM_ENABLE_PDL": "1",
93+
})

0 commit comments

Comments
 (0)