Only convert to int4 preshuffled tensor in H100 (#3245)

jerryzh168 · web-flow · commit f303f4c51b08 · 2025-10-24T23:01:13.000-07:00
Summary:
A minor fix for `convert_to_packed_tensor_based_on_current_hardware` to only
convert the Int4Tensor to Int4PreshuffledTensor when we are on H100 GPU

Test Plan:
pytest test/prototype/test_tensor_conversion.py

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/prototype/test_tensor_conversion.py b/test/prototype/test_tensor_conversion.py
@@ -34,7 +34,10 @@
     _is_kernel_library_loaded,
 )
 from torchao.quantization.utils import compute_error
-from torchao.utils import _is_fbgemm_gpu_genai_available
+from torchao.utils import (
+    _is_fbgemm_gpu_genai_available,
+    is_sm_at_least_90,
+)
 
 
 class ToyLinearModelWithTiedEmbedding(torch.nn.Module):
@@ -206,5 +209,9 @@ def test_int4_tensor_conversion():
         convert_to_packed_tensor_based_on_current_hardware(weight), requires_grad=False
     )
     after_conversion = m(*example_inputs)
-    assert isinstance(m[0].weight, Int4PreshuffledTensor)
+    if is_sm_at_least_90():
+        assert isinstance(m[0].weight, Int4PreshuffledTensor)
+    else:
+        assert isinstance(m[0].weight, Int4Tensor)
+
     assert torch.equal(before_conversion, after_conversion)
diff --git a/torchao/prototype/tensor_conversion/api.py b/torchao/prototype/tensor_conversion/api.py
@@ -14,7 +14,11 @@
     Int4Tensor,
     IntxUnpackedToInt8Tensor,
 )
-from torchao.utils import TorchAOBaseTensor, _is_fbgemm_gpu_genai_available
+from torchao.utils import (
+    TorchAOBaseTensor,
+    _is_fbgemm_gpu_genai_available,
+    is_sm_at_least_90,
+)
 
 
 def _convert_linear_weight_to_int8_lut_tensor(module):
@@ -187,6 +191,7 @@ def convert_to_packed_tensor_based_on_current_hardware(tensor: TorchAOBaseTensor
         isinstance(tensor, Int4Tensor)
         and is_device("cuda", tensor.device)
         and _is_fbgemm_gpu_genai_available()
+        and is_sm_at_least_90()
     ):
         return Int4PreshuffledTensor.from_int4_tensor(tensor)
     return tensor
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py
@@ -5,14 +5,14 @@
 # LICENSE file in the root directory of this source tree.
 
 
-import importlib.util
 from typing import List, Optional
 
 import torch
 
 from torchao.quantization.quantize_.workflows.int4.int4_tensor import Int4Tensor
 from torchao.utils import (
     TorchAOBaseTensor,
+    _is_fbgemm_gpu_genai_available,
 )
 
 __all__ = [
@@ -22,10 +22,7 @@
 aten = torch.ops.aten
 
 
-if (
-    importlib.util.find_spec("fbgemm_gpu") is None
-    or importlib.util.find_spec("fbgemm_gpu.experimental") is None
-):
+if not _is_fbgemm_gpu_genai_available():
     quantize_int4_preshuffle = None
     quantize_fp8_row = None
     pack_int4 = None