fix nvfp4 serialization (#3140)

vkuzo · web-flow · commit dd29563c3ebf · 2025-10-10T09:12:39.000-04:00
Update

[ghstack-poisoned]
diff --git a/test/prototype/mx_formats/test_inference_workflow.py b/test/prototype/mx_formats/test_inference_workflow.py
@@ -191,6 +191,16 @@ def test_inference_workflow_nvfp4(
         f"Got a sqnr of {sqnr} for NVFP4 recipe with bias={bias}, mm_config={mm_config}"
     )
 
+    # serialization
+    with tempfile.NamedTemporaryFile() as f:
+        torch.save(m_mx.state_dict(), f)
+        f.seek(0)
+
+        # temporary workaround for https://github.com/pytorch/ao/issues/3077
+        torch.serialization.add_safe_globals([getattr])
+
+        _ = torch.load(f, weights_only=True)
+
 
 class VLLMIntegrationTestCase(TorchAOIntegrationTestCase):
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
diff --git a/torchao/prototype/mx_formats/inference_workflow.py b/torchao/prototype/mx_formats/inference_workflow.py
@@ -211,6 +211,7 @@ def _nvfp4_inference_linear_transform(
         NVFP4MMConfig,
         MXGemmKernelChoice,
         QuantizeTensorToMXKwargs,
+        QuantizeTensorToNVFP4Kwargs,
         ScaleCalculationMode,
     ]
 )

Original file line number	Diff line number	Diff line change
`@@ -211,6 +211,7 @@ def _nvfp4_inference_linear_transform(`
`211`	`211`	`NVFP4MMConfig,`
`212`	`212`	`MXGemmKernelChoice,`
`213`	`213`	`QuantizeTensorToMXKwargs,`
	`214`	`+ QuantizeTensorToNVFP4Kwargs,`
`214`	`215`	`ScaleCalculationMode,`
`215`	`216`	`]`
`216`	`217`	`)`