fix mxfp8 matmul benchmark (#3221)

vkuzo · web-flow · commit 13595c5dfab9 · 2025-10-21T18:10:00.000-04:00
Summary:

Adds padding to the scales to properly support shapes where M % 128 != 0

Test Plan:

```
python benchmarks/float8/bench_matmul.py --shape_gen_name custom --recipe mxfp8_cublas --M 17 --K 32 --N 16
```

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/benchmarks/float8/bench_matmul.py b/benchmarks/float8/bench_matmul.py
@@ -17,6 +17,7 @@
 
 from torchao.ops import mx_fp4_bf16
 from torchao.prototype.mx_formats.mx_tensor import to_mx
+from torchao.prototype.mx_formats.utils import to_blocked
 from torchao.testing.training.roofline_utils import get_specs
 from torchao.utils import is_MI300
 
@@ -125,10 +126,16 @@ def run(
         elif recipe in ("mxfp8_cublas", "mxfp4_cutlass"):
             scale_a = torch.ones(M, K // 32, device=device, dtype=torch.float8_e8m0fnu)
             scale_b = torch.ones(N, K // 32, device=device, dtype=torch.float8_e8m0fnu)
+            # pad if needed
+            scale_a = to_blocked(scale_a)
+            scale_b = to_blocked(scale_b)
         elif recipe == "nvfp4":
             # Use the blockwise scales from nvfp4_quantize
             scale_a = A_scales.view(torch.float8_e4m3fn)
             scale_b = B_scales.view(torch.float8_e4m3fn)
+            # pad if needed
+            scale_a = to_blocked(scale_a)
+            scale_b = to_blocked(scale_b)
         else:
             assert False, f"unknown recipe {recipe}"