Fix WOQ int8 failures (#884)

leslie-fang-intel · web-flow · commit 39ce82382ee0 · 2024-09-28T14:17:09.000+08:00
* Fix WOQ int8 failures

* update readme

* add dynamo reset for freezing case

* re-trigger UT

* re-trigger CI

* trigger UT
diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py
@@ -826,9 +826,6 @@ def test_int8_dynamic_quant_subclass_api(self, device, dtype):
     @parameterized.expand(COMMON_DEVICE_DTYPE)
     @unittest.skipIf(is_fbcode(), "broken in fbcode")
     def test_int8_weight_only_quant_subclass_api(self, device, dtype):
-        if TORCH_VERSION_AT_LEAST_2_5 and device == "cpu":
-            self.skipTest("Regression introduced in PT nightlies")
-
         undo_recommended_configs()
         self._test_lin_weight_subclass_api_impl(
             _int8wo_api, device, 40, test_dtype=dtype
@@ -838,9 +835,7 @@ def test_int8_weight_only_quant_subclass_api(self, device, dtype):
     @torch._inductor.config.patch({"freezing": True})
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "freeze requires torch 2.4 and after.")
     def test_int8_weight_only_quant_with_freeze(self, device, dtype):
-        if TORCH_VERSION_AT_LEAST_2_5 and device == "cpu":
-            self.skipTest("Regression introduced in PT nightlies")
-
+        torch._dynamo.reset()
         self._test_lin_weight_subclass_api_impl(
             _int8wo_api, device, 40, test_dtype=dtype
         )
@@ -1054,10 +1049,7 @@ def test_save_load_dqtensors(self, device, dtype):
     @parameterized.expand(COMMON_DEVICE_DTYPE)
     @torch.no_grad()
     @unittest.skipIf(is_fbcode(), "broken in fbcode")
-    def test_save_load_int8woqtensors(self, device, dtype):      
-        if TORCH_VERSION_AT_LEAST_2_5 and device == "cpu":
-            self.skipTest(f"Regression introduced in PT nightlies")
-
+    def test_save_load_int8woqtensors(self, device, dtype):
         undo_recommended_configs()
         self._test_handle_save_load_meta_impl(_int8wo_api, device, test_dtype=dtype)
 
diff --git a/torchao/quantization/README.md b/torchao/quantization/README.md
@@ -246,7 +246,7 @@ but if you use 2.4 or before, you'll need to use `unwrap_tensor_subclass` as wel
 
 Note that the workaround will not be needed after https://github.com/pytorch/pytorch/issues/129682 is fixed.
 
-
+Note that the workaround is also required for `torch.compile` with `freezing` (`torch._inductor.config.freezing=True`) until https://github.com/pytorch/pytorch/pull/136265 is fixed.
 
 ## Other Available Quantization Techniques