Add third draft of mm_fp4 backend -- no audotune

bkryu · bkryu · commit 80f76e0f5dab · 2025-10-31T18:09:37.000Z
diff --git a/benchmarks/routines/gemm.py b/benchmarks/routines/gemm.py
@@ -790,7 +790,7 @@ def testMmFp4(args):
     run_refcheck = args.refcheck
     use_128x4_sf_layout = args.use_128x4_sf_layout
     use_nvfp4 = args.use_nvfp4
-    autotune_supported_backends = ["cutlass", "trtllm"]
+    autotune_supported_backends = ["cutlass", "trtllm", "auto"]
     res = []
 
     backends = filter_backends_by_compute_capability(backends, args.routine, device)
diff --git a/flashinfer/gemm.py b/flashinfer/gemm.py
@@ -1887,7 +1887,6 @@ def _auto_gemm_fp4_requirement(
             checker, "is_compute_capability_supported"
         ) and checker.is_compute_capability_supported(cc_arch):
             # At least one backend is supported
-            print(f"Backend {candidate} is supported on this device.")
             return True
 
     # No backend is supported on this device
@@ -1994,8 +1993,9 @@ def mm_fp4(
     if backend == "auto":
         cuda_major, _ = get_cuda_version(a.device)
         cc_major, cc_minor = get_compute_capability(a.device)
-        # If cuda version is 13 or greater AND cudnn version is 9.X or greater, prioritize cudnn.
-        if cuda_major >= 13:  # to-do add cudnn version threshold
+        # If cuda version is 13 or greater:
+        # cudnn is more performant if cudnn version is 9.14 or greater.
+        if cuda_major >= 13 and cudnn.backend_version() >= 91400:
             candidate_backends = ("cudnn", "cutlass")
         # Otherwise, prioritize cutlass
         else:
@@ -2026,11 +2026,7 @@ def mm_fp4(
                 supported_backends.append(candidate)
             except Exception:
                 pass
-        print(f"Supported backends: {supported_backends}")
         selected_backend = supported_backends[0]
-        print(
-            f"Selected backend: {selected_backend} for cuda version {cuda_major} and compute capability {cc_major}{cc_minor}"
-        )
     else:
         selected_backend = backend
     if selected_backend == "cudnn":