Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit c423deb

Browse files
authored
check and convert contiguous tensor when model saving (#1414)
1 parent a1cc64b commit c423deb

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

examples/huggingface/pytorch/code-generation/quantization/run_generation.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,28 @@
6363
parser.add_argument("--mixed_precision", action="store_true")
6464
# ============SmoothQuant configs==============
6565
parser.add_argument("--sq", action="store_true")
66+
parser.add_argument("--calib_iters", default=100, type=int, help="Calibration iters.")
67+
parser.add_argument(
68+
"--calib_padding", action="store_true", help="Calibration dataset do padding."
69+
)
70+
parser.add_argument(
71+
"--calib_shuffle",
72+
default=True,
73+
type=str2bool,
74+
help="Calibration dataset do shuffle.",
75+
)
76+
parser.add_argument(
77+
"--calib_pad_val", default=1, type=int, help="Calibration dataset padding value."
78+
)
79+
parser.add_argument(
80+
"--calib_len",
81+
default=512,
82+
type=int,
83+
help="Calibration dataset max or padding max length.",
84+
)
85+
parser.add_argument(
86+
"--recipes", type=str, help="A dictionary as a string, recipes for smoothquant."
87+
)
6688
parser.add_argument("--alpha", default="0.5", help="Smooth quant parameter.")
6789
# ============BitsAndBytes configs==============
6890
parser.add_argument("--bitsandbytes", action="store_true")
@@ -109,6 +131,8 @@
109131
)
110132
parser.add_argument("--group_size", type=int, default=32)
111133
parser.add_argument("--scheme", default="sym")
134+
parser.add_argument("--load_in_4bit", action="store_true")
135+
parser.add_argument("--load_in_8bit", action="store_true")
112136
parser.add_argument(
113137
"--layer_wise",
114138
action="store_true",

intel_extension_for_transformers/transformers/modeling/modeling_auto.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,12 @@ def recover_export_model(model, current_key_name=None):
124124
use_optimum_format=True,
125125
)
126126

127-
model._modules[name].pack(int_weight, scales, zeros, module.bias, g_idx=g_idx)
127+
# Setting g_idx is invalid when use_optimum_format is True, so set it again when g_idx is not None.
128+
# https://github.com/intel/neural-compressor/blob/v2.5.dev2/neural_compressor/adaptor/torch_utils/
129+
# model_wrapper.py#L343
130+
model._modules[name].pack(
131+
int_weight, scales, zeros, module.bias, g_idx=g_idx
132+
)
128133
if g_idx is not None:
129134
model._modules[name].g_idx = g_idx
130135

@@ -189,6 +194,12 @@ def convert_model_to_public(model):
189194
model = recover_export_model(model)
190195

191196

197+
def make_contiguous(model):
198+
for param in model.parameters():
199+
if param.data.ndimension() > 1:
200+
param.data = param.data.contiguous()
201+
202+
192203
def save_low_bit(
193204
self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs
194205
):
@@ -207,6 +218,7 @@ def save_low_bit(
207218
os.makedirs(save_directory, exist_ok=True)
208219
# use transformers original `save_pretrained` function
209220
del self.save_pretrained
221+
make_contiguous(self)
210222
self.save_pretrained(
211223
save_directory=save_directory, push_to_hub=push_to_hub, **kwargs
212224
)
@@ -416,7 +428,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
416428

417429
load_in_8bit = kwargs.pop("load_in_8bit", False)
418430
load_in_4bit = kwargs.pop("load_in_4bit", False)
419-
420431
if isinstance(quantization_config, BitsAndBytesConfig):
421432
model = cls.ORIG_MODEL.from_pretrained(
422433
pretrained_model_name_or_path,

0 commit comments

Comments
 (0)