[vllm][benchmarks] Remove one memory allocation (#5340)

Egor-Krivov · web-flow · commit c795abd768e0 · 2025-10-17T18:12:06.000+02:00
2 changes:
1. Remove one memory allocation that is not necessary.
2. Fix bug with result overwrite
diff --git a/.github/workflows/third-party-benchmarks.yml b/.github/workflows/third-party-benchmarks.yml
@@ -112,7 +112,7 @@ jobs:
 
           cd benchmarks/third_party/vllm
           FP8="1" python batched_moe_benchmark.py --reports $REPORTS
-          python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-fp8-benchmark
+          python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-fp8-report.csv --tag $TAG --benchmark moe-fp8-benchmark
 
       - name: Run Liger-Kernel benchmarks
         if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger-kernel')) }}
diff --git a/benchmarks/third_party/vllm/batched_moe_benchmark.py b/benchmarks/third_party/vllm/batched_moe_benchmark.py
@@ -25,7 +25,7 @@
 from vllm.model_executor.layers.fused_moe.utils import normalize_batched_scales_shape
 
 # Import utility functions from vLLM tests
-from tests.kernels.moe.utils import make_quantized_test_activations, make_test_weights
+from tests.kernels.moe.utils import make_quantized_test_activations, make_test_weight
 from tests.kernels.quant_utils import native_batched_masked_quant_matmul
 
 
@@ -552,9 +552,9 @@ def benchmark(num_experts, max_tokens_per_expert, K, N, fp8, block_quant, provid
         )
 
         # Create test weights (only need B matrix for batched MM)
-        (B, B_q, B_scale, _), _ = make_test_weights(
+        B, B_q, B_scale, _ = make_test_weight(
             num_experts,
-            N // 2,
+            N,
             K,
             in_dtype=act_dtype,
             quant_dtype=quant_dtype,