Add CreateQuantPybindModule into orttraining (microsoft#26401)

jiafatom · web-flow · commit 697283dd8f5e · 2025-10-24T18:11:02.000-07:00
### Description
As titled



### Motivation and Context
As titled
diff --git a/onnxruntime/test/python/quantization/test_op_matmul_2bits.py b/onnxruntime/test/python/quantization/test_op_matmul_2bits.py
@@ -7,7 +7,6 @@
 
 import tempfile
 import unittest
-from importlib.util import find_spec
 from pathlib import Path
 
 import numpy as np
@@ -205,9 +204,6 @@ def quant_test(
             else:
                 raise exception
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     def test_quantize_matmul_int2_symmetric(self):
         np.random.seed(13)
 
@@ -216,9 +212,6 @@ def test_quantize_matmul_int2_symmetric(self):
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test(model_fp32_path, data_reader, 32, True, rtol=0.02, atol=0.1)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     def test_quantize_matmul_int2_offsets(self):
         model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_offset.onnx").absolute())
         self.construct_model_matmul(model_fp32_path, symmetric=False)
diff --git a/onnxruntime/test/python/quantization/test_op_matmul_4bits.py b/onnxruntime/test/python/quantization/test_op_matmul_4bits.py
@@ -295,9 +295,6 @@ def quant_test_with_algo(
             else:
                 raise exception
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_symmetric(self):
         np.random.seed(13)
 
@@ -306,18 +303,12 @@ def test_quantize_matmul_int4_symmetric(self):
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test(model_fp32_path, data_reader, 32, True)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_offsets(self):
         model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_offset.onnx").absolute())
         self.construct_model_matmul(model_fp32_path, symmetric=False)
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test(model_fp32_path, data_reader, 32, False)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_gather_int4_symmetric(self):
         np.random.seed(13)
 
@@ -327,19 +318,13 @@ def test_quantize_gather_int4_symmetric(self):
         # cover rounding error
         self.quant_test(model_fp32_path, data_reader, 32, True, op_types_to_quantize=("Gather",), rtol=0.2, atol=0.5)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_gather_int4_offsets(self):
         model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("gather_fp32_offset.onnx").absolute())
         self.construct_model_gather(model_fp32_path, False, TensorProto.FLOAT16, TensorProto.INT64)
         data_reader = self.input_feeds(1, {"input": (100, 1000)}, -545, 535, np.int64)
         # cover rounding error
         self.quant_test(model_fp32_path, data_reader, 32, False, op_types_to_quantize=("Gather",), rtol=0.2, atol=0.5)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_symmetric_qdq(self):
         np.random.seed(13)
 
@@ -348,18 +333,12 @@ def test_quantize_matmul_int4_symmetric_qdq(self):
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test(model_fp32_path, data_reader, 32, True, quant_utils.QuantFormat.QDQ)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_offsets_qdq(self):
         model_fp32_path = str(Path(self._tmp_model_dir.name).joinpath("matmul_fp32_offset.onnx").absolute())
         self.construct_model_matmul(model_fp32_path, symmetric=False)
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test(model_fp32_path, data_reader, 32, False, quant_utils.QuantFormat.QDQ)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_using_rtn_algo(self):
         if not find_spec("neural_compressor"):
             self.skipTest("skip test_smooth_quant since neural_compressor is not installed")
@@ -370,9 +349,6 @@ def test_quantize_matmul_int4_using_rtn_algo(self):
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test_with_algo("RTN", model_fp32_path, data_reader, 32, False)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_using_gptq_algo(self):
         if not find_spec("neural_compressor"):
             self.skipTest("skip test_smooth_quant since neural_compressor is not installed")
@@ -383,9 +359,6 @@ def test_quantize_matmul_int4_using_gptq_algo(self):
         data_reader = self.input_feeds(1, {"input": (100, 52)})
         self.quant_test_with_algo("GPTQ", model_fp32_path, data_reader, 32, False)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_matmul_int4_using_hqq_algo(self):
         if not find_spec("torch"):
             self.skipTest("skip test_hqq_quant since torch is not installed")
diff --git a/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py b/onnxruntime/test/python/quantization/test_op_matmul_bnb4.py
@@ -7,7 +7,6 @@
 
 import tempfile
 import unittest
-from importlib.util import find_spec
 from pathlib import Path
 
 import numpy as np
@@ -166,16 +165,10 @@ def quant_test(self, quant_type: int, block_size: int):
         except Exception as exception:
             raise exception
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
-    )
     def test_quantize_matmul_bnb4_fp4(self):
         np.random.seed(13)
         self.quant_test(0, 64)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
-    )
     def test_quantize_matmul_bnb4_nf4(self):
         np.random.seed(13)
         self.quant_test(1, 64)
diff --git a/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py b/onnxruntime/test/python/quantization/test_quantizeblockwise_4bits.py
@@ -6,7 +6,6 @@
 # --------------------------------------------------------------------------
 
 import unittest
-from importlib.util import find_spec
 
 import numpy as np
 import numpy.typing as npt
@@ -99,9 +98,6 @@ def quantize_blockwise_4bits_target(matrix_float: npt.ArrayLike, block_size: int
 
 
 class TestQuantizeBlockwise4Bits(unittest.TestCase):
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_4bits"
-    )
     def test_quantize_blockwise_4bits(self):
         for rows, cols in [(128, 128), (32, 128), (128, 32), (52, 128), (128, 52), (73, 123)]:
             for block_size in [16, 32, 64, 128]:
diff --git a/onnxruntime/test/python/quantization/test_quantizeblockwise_bnb4.py b/onnxruntime/test/python/quantization/test_quantizeblockwise_bnb4.py
@@ -6,7 +6,6 @@
 # --------------------------------------------------------------------------
 
 import unittest
-from importlib.util import find_spec
 
 import numpy as np
 import numpy.typing as npt
@@ -120,9 +119,6 @@ def quantize_blockwise_bnb4_target(matrix_float: npt.ArrayLike, block_size: int,
 
 
 class TestQuantizeBlockwiseBnb4(unittest.TestCase):
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_bnb4"
-    )
     def test_quantize_blockwise_bnb4(self):
         for quant_type in ["FP4", "NF4"]:
             for k, n in [(128, 128), (32, 128), (128, 32), (52, 128), (128, 52), (73, 123)]:
diff --git a/onnxruntime/test/python/transformers/test_generation.py b/onnxruntime/test/python/transformers/test_generation.py
@@ -8,7 +8,6 @@
 import os
 import shutil
 import unittest
-from importlib.util import find_spec
 
 import onnx
 import pytest
@@ -21,16 +20,12 @@
     from benchmark_helper import Precision
     from convert_generation import main as run
     from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models
-
-    if not find_spec("onnxruntime.training"):
-        from models.whisper.convert_to_onnx import main as run_whisper
+    from models.whisper.convert_to_onnx import main as run_whisper
 else:
     from onnxruntime.transformers.benchmark_helper import Precision
     from onnxruntime.transformers.convert_generation import main as run
     from onnxruntime.transformers.models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models
-
-    if not find_spec("onnxruntime.training"):
-        from onnxruntime.transformers.models.whisper.convert_to_onnx import main as run_whisper
+    from onnxruntime.transformers.models.whisper.convert_to_onnx import main as run_whisper
 
 
 def has_cuda_environment():
@@ -514,33 +509,21 @@ def run_configs(self, optional_arguments):
         if "--model_impl" not in arguments:
             self.run_export(arguments)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     @pytest.mark.slow
     def test_required_args(self):
         optional_args = []
         self.run_configs(optional_args)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     @pytest.mark.slow
     def test_forced_decoder_ids(self):
         decoder_input_ids = ["--use_forced_decoder_ids"]
         self.run_configs(decoder_input_ids)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     @pytest.mark.slow
     def test_logits_processor(self):
         logits_processor = ["--use_logits_processor"]
         self.run_configs(logits_processor)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     @pytest.mark.slow
     def test_cross_qk_overall(self):
         cross_qk_input_args = [
@@ -557,9 +540,6 @@ def test_cross_qk_overall(self):
         ]
         self.run_configs(cross_qk_input_args + cross_qk_output_args)
 
-    @unittest.skipIf(
-        find_spec("onnxruntime.training"), "Skip because training package doesn't has quantize_matmul_2bits"
-    )
     @pytest.mark.slow
     def test_openai_impl_whisper(self):
         optional_args = ["--model_impl", "openai"]
diff --git a/orttraining/orttraining/python/orttraining_python_module.cc b/orttraining/orttraining/python/orttraining_python_module.cc
@@ -300,6 +300,8 @@ Status CreateTrainingPybindStateModule(py::module& m) {
   return Status::OK();
 }
 
+void CreateQuantPybindModule(py::module& m);
+
 PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
   auto st = CreateTrainingPybindStateModule(m);
   if (!st.IsOK())
@@ -332,6 +334,7 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) {
       "Clean the execution provider instances used in ort training module.");
 
   m.def("has_collective_ops", []() -> bool { return HAS_COLLECTIVE_OPS; });
+  CreateQuantPybindModule(m);
 }
 
 }  // namespace python

Original file line number	Diff line number	Diff line change
`@@ -300,6 +300,8 @@ Status CreateTrainingPybindStateModule(py::module& m) {`
`300`	`300`	`return Status::OK();`
`301`	`301`	`}`
`302`	`302`
	`303`	`+void CreateQuantPybindModule(py::module& m);`
	`304`	`+`
`303`	`305`	`PYBIND11_MODULE(onnxruntime_pybind11_state, m) {`
`304`	`306`	`auto st = CreateTrainingPybindStateModule(m);`
`305`	`307`	`if (!st.IsOK())`
`@@ -332,6 +334,7 @@ PYBIND11_MODULE(onnxruntime_pybind11_state, m) {`
`332`	`334`	`"Clean the execution provider instances used in ort training module.");`
`333`	`335`
`334`	`336`	`m.def("has_collective_ops", []() -> bool { return HAS_COLLECTIVE_OPS; });`
	`337`	`+ CreateQuantPybindModule(m);`
`335`	`338`	`}`
`336`	`339`
`337`	`340`	`} // namespace python`