Improve MPT series SQ (#1640)

changwangss · web-flow · commit 63056ece9ad6 · 2024-06-28T17:54:14.000+08:00
Signed-off-by: Wang, Chang &lt;chang1.wang@intel.com&gt;
diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -840,6 +840,12 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
                 or device_map == torch.device("cpu")
             ) and model.config.model_type == "chatglm":
                 model = model.float()
+            if (
+                not torch.cuda.is_available()
+                or device_map == "cpu"
+                or device_map == torch.device("cpu")
+            ) and model.config.model_type == "mpt":
+                model.config.architectures = ["MptForCausalLM"]
             model.eval()
             model_type = model.config.model_type.replace("_", "-")
 
@@ -1077,6 +1083,7 @@ def calib_func(model):
                 recipes=quantization_config.recipes,
                 example_inputs=example_inputs,
             )
+
             model = quantization.fit(
                 model,
                 conf,