Add Qwen3-Next benchmarks to CI

yuantailing · yuantailing · commit 31c1afb0cb9b · 2025-11-11T07:30:22.000Z
Signed-off-by: Tailing Yuan &lt;yuantailing@gmail.com&gt;
diff --git a/examples/layer_wise_benchmarks/README.md b/examples/layer_wise_benchmarks/README.md
@@ -49,8 +49,8 @@ NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --scaled-from 16 --moe-back
 NP=4 ./mpi_launch.sh ./run_single.sh config_gen.yaml --scaled-from 16 --no-enable-attention-dp
 
 # Run Qwen3-Next (balanced routing is not implemented)
-NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_ctx.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct  --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
-NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_gen.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct  --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
+NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_ctx.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
+NP=2 TRTLLM_ENABLE_PDL=1 ./mpi_launch.sh ./run_single.sh config_gen.yaml --model Qwen/Qwen3-Next-80B-A3B-Instruct --layer-indices 6,7 --no-enable-attention-dp --moe-backend TRTLLM --balance-method NotModified
 
 # Run with DeepEP A2A
 NP=4 TRTLLM_FORCE_ALLTOALL_METHOD=DeepEP ./mpi_launch.sh ./run_single.sh config_ctx.yaml --moe-backend WIDEEP
diff --git a/tests/integration/test_lists/test-db/l0_b200.yml b/tests/integration/test_lists/test-db/l0_b200.yml
@@ -76,6 +76,7 @@ l0_b200:
   - unittest/_torch/modeling -k "modeling_llama"
   - unittest/_torch/modeling -k "modeling_mixtral"
   - unittest/_torch/modeling -k "modeling_gpt_oss"
+  - unittest/tools/test_layer_wise_benchmarks.py::test_qwen3_next_gen_tep[1]
     # ------------- AutoDeploy tests ---------------
   - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
   - unittest/_torch/auto_deploy/unit/singlegpu
diff --git a/tests/unittest/tools/test_layer_wise_benchmarks.py b/tests/unittest/tools/test_layer_wise_benchmarks.py
@@ -67,3 +67,27 @@ def test_deepseek_r1_gen_scaled_from_16_dep(llm_root):
                    **os.environ,
                    "NP": "4",
                })
+
+
+@pytest.mark.parametrize("tp_size", [1, 2, 4])
+def test_qwen3_next_gen_tep(llm_root, tp_size):
+    if torch.cuda.device_count() < tp_size:
+        pytest.skip(f"needs {tp_size:d} GPUs to run this test")
+    model_root = llm_models_root(check=True)
+    check_call([
+        "./mpi_launch.sh",
+        "./run_single.sh",
+        "config_gen.yaml",
+        "--model",
+        model_root / "Qwen3" / "Qwen3-Next-80B-A3B-Instruct",
+        "--layer-indices=6,7",
+        "--no-enable-attention-dp",
+        "--moe-backend=TRTLLM",
+        "--balance-method=NotModified",
+    ],
+               cwd=llm_root / "examples" / "layer_wise_benchmarks",
+               env={
+                   **os.environ,
+                   "NP": f"{tp_size:d}",
+                   "TRTLLM_ENABLE_PDL": "1",
+               })