qbits support torch version compatiblity check (#1607)

zhewang1-intc · pre-commit-ci[bot] · web-flow · commit f978bcfc1120 · 2024-06-14T12:18:40.000+08:00
* qbits support torch version compatiblity check * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/docs/qbits.md b/docs/qbits.md
@@ -74,3 +74,8 @@ If user wants to use QBits, the Pytorch version must meet ITREX requirements, he
 |     v1.4      |    2.2.0+cpu    |
 |    v1.4.1     |    2.2.0+cpu    |
 |    v1.4.2     |    2.3.0+cpu    |
+
+Users can also check whether the current torch version is compatible with QBits by using the `check_torch_compatibility` function provided by QBits.  
+```python
+assert qbits.check_torch_compatibility(str(torch.__version__))
+```
diff --git a/intel_extension_for_transformers/qbits/CMakeLists.txt b/intel_extension_for_transformers/qbits/CMakeLists.txt
@@ -16,16 +16,27 @@ project(qbits_py LANGUAGES C CXX)
 
 
 set(QBITS_TORCH_PATH "" CACHE STRING "Torch install path")
+set(torch_info "")
+
+function(get_torch_info python_command)
+    set(import_torch "import torch:")
+    string(REPLACE ":" ";" import_torch ${import_torch})
+    string(CONCAT fin_command "${import_torch}" "${python_command}")
+    execute_process(COMMAND python -c "${fin_command}"
+                    OUTPUT_VARIABLE torch_info
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+    set(torch_info "${torch_info}" PARENT_SCOPE)
+endfunction()
+
 
 if(QBITS_TORCH_PATH)
     set(torch_path ${QBITS_TORCH_PATH})
     unset(TORCH_LIBRARY CACHE)  # force find_package torch
     unset(c10_LIBRARY CACHE)
     unset(TORCH_DIR CACHE)
 else()
-    execute_process(COMMAND python -c "import torch; print(torch.__path__[0])"
-                    OUTPUT_VARIABLE torch_path
-                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+    get_torch_info("print(torch.__path__[0])")
+    set(torch_path "${torch_info}")
 endif()
 
 find_package(Torch REQUIRED
@@ -48,6 +59,10 @@ add_compile_options(-flto=auto)
 
 # Link against LibTorch
 pybind11_add_module(qbits_py ${qbits_src})
+get_torch_info("print(torch.__version__)")
+set(torch_version "${torch_info}")
 target_compile_features(qbits_py PRIVATE cxx_std_14)
+set(TORCH_VERSION_MACRO COMPATIBLE_TORCH_VERSION="${torch_version}")
+target_compile_definitions(qbits_py PUBLIC ${TORCH_VERSION_MACRO})
 target_link_directories(qbits_py PRIVATE ${torch_path}/lib)
 target_link_libraries(qbits_py PRIVATE bestla_dispatcher torch_python)
diff --git a/intel_extension_for_transformers/qbits/qbits.cpp b/intel_extension_for_transformers/qbits/qbits.cpp
@@ -114,7 +114,6 @@ static void woq_linear(const torch::Tensor& activation, const torch::Tensor& wei
                        torch::Tensor& output, const std::string& compute_type, const std::string& weight_type,
                        const std::string& scale_type, bool asym) {
   woq::woq_config_param p;
-
   torch::Tensor bias_fp32;
   torch::Tensor* rt_bias = bias.numel() == 0 ? &output : const_cast<torch::Tensor*>(&bias);
   if (bias.scalar_type() != torch::kFloat32 && bias.numel() != 0) {
@@ -180,6 +179,16 @@ static bool check_isa_supported(std::string isa) {
   return false;
 }
 
+static bool check_torch_compatibility(std::string version) {
+  static std::string expected_version = COMPATIBLE_TORCH_VERSION;
+  if (version == expected_version) {
+    return true;
+  }
+  TORCH_CHECK(false,
+              "QBits: Detected non QBits compiled version Torch, expected" + expected_version + ", but got " + version);
+  return false;
+}
+
 PYBIND11_MODULE(qbits_py, m) {
   m.def("quantize_to_packed_weight", &quantize_to_packed_weight);
   m.def("woq_linear", &woq_linear);
@@ -193,4 +202,5 @@ PYBIND11_MODULE(qbits_py, m) {
   m.def("dropout_fwd", &qbits_dropout_fwd);
   m.def("dropout_bwd", &qbits_dropout_bwd);
   m.def("check_isa_supported", &check_isa_supported);
+  m.def("check_torch_compatibility", &check_torch_compatibility);
 }
diff --git a/intel_extension_for_transformers/qbits/qbits_ut/test_weightonly.py b/intel_extension_for_transformers/qbits/qbits_ut/test_weightonly.py
@@ -41,6 +41,7 @@
 @pytest.mark.parametrize("src_dt", ["fp32", "bf16"])
 @pytest.mark.parametrize("dst_dt", ["fp32", "bf16"])
 def test(m, n, k, blocksize, compute_type, weight_type, scale_type, asym, transpose, add_bias, src_dt, dst_dt, dump_tensor_info=True):
+    assert qbits.check_torch_compatibility(str(torch.__version__))
     if compute_type == "int8" and weight_type == "int8" and (not qbits.check_isa_supported("AVX_VNNI")):
         pytest.skip()
     if compute_type not in cmpt_configs[weight_type] or scale_type not in scale_configs[weight_type]:
diff --git a/intel_extension_for_transformers/qbits/run_build.sh b/intel_extension_for_transformers/qbits/run_build.sh