issue/170: modified cpu.lua

xgqdut2016 · xgqdut2016 · commit 0f73dd8bcad0 · 2025-04-30T11:37:06.000+08:00
diff --git a/src/infiniop/ops/matmul_gptq/cpu/matmul_gptq_cpu.cc b/src/infiniop/ops/matmul_gptq/cpu/matmul_gptq_cpu.cc
@@ -240,13 +240,16 @@ void fasterquant(T *weight, T *Q, T *Err, T *b_scale, T *zero, float *Hess,
                 float w = utils::cast<float>(weight[n * K + index * blocksize + i]);
                 float err = (w - q) / d;
 
-                for (int j = i; j < blocksize; j++) {
-                    if constexpr (std::is_same<T, fp16_t>::value) {
-                        weight[n * K + index * blocksize + j] = utils::cast<fp16_t>(utils::cast<float>(weight[n * K + index * blocksize + j]) - err * Hess[(index * blocksize + i) * K + j]);
-                    } else if constexpr (std::is_same<T, float>::value) {
-                        weight[n * K + index * blocksize + j] -= err * Hess[(index * blocksize + i) * K + j];
+                if (group_size == -1) {
+                    for (int j = i; j < blocksize; j++) {
+                        if constexpr (std::is_same<T, fp16_t>::value) {
+                            weight[n * K + index * blocksize + j] = utils::cast<fp16_t>(utils::cast<float>(weight[n * K + index * blocksize + j]) - err * Hess[(index * blocksize + i) * K + j]);
+                        } else if constexpr (std::is_same<T, float>::value) {
+                            weight[n * K + index * blocksize + j] -= err * Hess[(index * blocksize + i) * K + j];
+                        }
                     }
                 }
+
                 if constexpr (std::is_same<T, fp16_t>::value) {
                     Err[n * blocksize + i] = utils::cast<fp16_t>(err);
                 } else if constexpr (std::is_same<T, float>::value) {
diff --git a/test/infiniop/matmul_gptq.py b/test/infiniop/matmul_gptq.py
@@ -82,7 +82,7 @@ def test(
     )
     torch.manual_seed(12)
     # Initialize tensors
-    a = 1e-3 * torch.randn([M, K], dtype=dtype).to(torch_device)
+    a = 1e0 * torch.randn([M, K], dtype=dtype).to(torch_device)
     layer = nn.Linear(K, N)
     b = 1e-3 * layer.weight.data.to(dtype).to(torch_device)
     c = torch.zeros([M, N], dtype=dtype).to(torch_device).t()
diff --git a/xmake/cpu.lua b/xmake/cpu.lua
@@ -1,4 +1,4 @@
-add_requires("lapacke", {public = true})
+add_requires("lapack", {configs = {shared = true}})
 target("infiniop-cpu")
     set_kind("static")
     add_deps("infini-utils")
@@ -13,9 +13,8 @@ target("infiniop-cpu")
         end
     else
         add_cxflags("-fPIC")
-        add_includedirs("/usr/include")
-        add_linkdirs("/usr/lib64")
-        add_links("lapacke", "lapack", "blas")
+        add_packages("lapack")
+        add_links("lapacke", "lapack", "blas", "gfortran")
         if has_config("omp") then
             add_cxflags("-fopenmp")
             add_ldflags("-fopenmp")

Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ def test(`
`82`	`82`	`)`
`83`	`83`	`torch.manual_seed(12)`
`84`	`84`	`# Initialize tensors`
`85`		`- a = 1e-3 * torch.randn([M, K], dtype=dtype).to(torch_device)`
	`85`	`+ a = 1e0 * torch.randn([M, K], dtype=dtype).to(torch_device)`
`86`	`86`	`layer = nn.Linear(K, N)`
`87`	`87`	`b = 1e-3 * layer.weight.data.to(dtype).to(torch_device)`
`88`	`88`	`c = torch.zeros([M, N], dtype=dtype).to(torch_device).t()`