Skip to content

Commit 8a4be4f

Browse files
authored
Fix GPTQ NF4&FP4 quant (#2314)
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
1 parent 588e7fc commit 8a4be4f

File tree

1 file changed

+2
-0
lines changed
  • neural_compressor/torch/algorithms/weight_only

1 file changed

+2
-0
lines changed

neural_compressor/torch/algorithms/weight_only/utility.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=-
516516
if dtype in FLOAT_MAPPING.keys(): # NF4 FP4
517517
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size]
518518
quantize_4bit(int_weight_tmp, scale=scale[:, i].unsqueeze(1), dtype=dtype, return_int=True)[0]
519+
int_weight[:, i * group_size : (i + 1) * group_size].copy_(int_weight_tmp)
519520
else:
520521
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
521522
if zp is not None:
@@ -526,6 +527,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=-
526527
if dtype in FLOAT_MAPPING.keys(): # NF4 FP4
527528
int_weight_tmp = weight[:, leng * group_size :]
528529
quantize_4bit(int_weight_tmp, scale=scale[:, -1].unsqueeze(1), dtype=dtype, return_int=True)[0]
530+
int_weight[:, leng * group_size :].copy_(int_weight_tmp)
529531
else:
530532
int_weight_tmp = weight[:, leng * group_size :].div_(scale[:, -1].unsqueeze(1))
531533
if zp is not None:

0 commit comments

Comments
 (0)