[Tests] Update Schemes (#2013)

dsikka · web-flow · commit f22ca147030d · 2025-11-10T17:06:41.000-05:00
SUMMARY:
- Latest CT introduces `scale_dtype` which if not provided, will use the
weight dtype for the scales
- As a result, we no longer hardcode FP8 as the dtype when generating
scales for NVFp4 and instead rely on this field. Update the tests to
reflect this

Testing
- Addresses 2/14 failures. All other failures are coming from one test
case which will be resolved in a follow-up
diff --git a/tests/llmcompressor/modifiers/calibration/test_lifecycle.py b/tests/llmcompressor/modifiers/calibration/test_lifecycle.py
@@ -1,6 +1,7 @@
 import pytest
 import torch
 from compressed_tensors.quantization import (
+    FP8_E4M3_DATA,
     QuantizationScheme,
     forward_quantize,
     initialize_module_for_quantization,
@@ -83,6 +84,7 @@
                 symmetric=True,
                 strategy="tensor_group",  # requires float4
                 group_size=3,
+                scale_dtype=FP8_E4M3_DATA.dtype,
             ),
             torch.tensor([[0, 3], [6, 9], [12, 15], [18, 21]]),
             torch.tensor([[2, 5], [8, 11], [14, 17], [20, 23]]),
@@ -195,6 +197,7 @@ def test_static_weight_quantization(
                 strategy="tensor_group",
                 dynamic="local",
                 group_size=3,
+                scale_dtype=FP8_E4M3_DATA.dtype,
             ),
             None,
             None,