add QuantizeLinearForQbits activation contiguous check (#1072)

changwangss · web-flow · commit 97c114dc35bd · 2023-12-25T16:52:47.000+08:00
Signed-off-by: changwangss &lt;chang1.wang@intel.com&gt;
diff --git a/intel_extension_for_transformers/llm/quantization/nn/modules.py b/intel_extension_for_transformers/llm/quantization/nn/modules.py
@@ -120,6 +120,8 @@ def forward(self, x: torch.Tensor):
         m = reduce(mul, shape[0:-1])
         out = torch.zeros(m, self.out_features, dtype=x.dtype)
         bias = None if self.bias is None else self.bias.data
+        if not x.is_contiguous():
+            x = x.contiguous()
         out = matmul_kbit(
             x.view(m, shape[-1]), self.weight, bias, out,
             self.compute_dtype, self.weight_dtype, self.scale_dtype, do_dequant=self.training