@@ -40,8 +40,9 @@ pip install -v .
4040# install requirements
4141cd examples/huggingface/pytorch/text-generation/quantization
4242pip install -r requirements.txt
43- pip install neural-compressor==2.5
44- pip install transformers==4.35.2
43+ pip install neural-compressor==2.6
44+ pip install transformers==4.38.1
45+
4546pip install torch==2.3.0+cpu --index-url https://download.pytorch.org/whl/cpu
4647pip install intel-extension-for-pytorch==2.3.0
4748```
@@ -221,10 +222,11 @@ python run_generation_sq.py \
221222 --calib_len 2048 \
222223 --fallback_add \
223224 --calib_shuffle False \
225+ --calib_iters 512 \
224226 --tasks lambada_openai \
225227 --int8 --sq --accuracy \
226228 --batch_size 1 \
227- --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.8 , 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'mean'}}}"
229+ --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.79 , 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'mean'}}}"
228230```
229231
230232### Weight-Only Quantization
@@ -276,11 +278,12 @@ python run_generation_sq.py \
276278 --trust_remote_code \
277279 --calib_len 1024 \
278280 --fallback_add \
281+ --calib_iters 512
279282 --calib_padding \
280283 --tasks lambada_openai \
281284 --int8 --sq --accuracy \
282285 --batch_size 1 \
283- --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.75, 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'max'}}}"
286+ --recipes " {'smooth_quant': True, 'smooth_quant_args': {'alpha': 'auto', 'folding': False, 'default_alpha': 0.8, 'auto_alpha_args': {'alpha_min': 0.75, 'alpha_max': 0.99, 'alpha_step': 0.01, 'shared_criterion': 'max', 'n_samples':64 }}}"
284287```
285288
286289### Weight-Only Quantization
@@ -544,7 +547,7 @@ python run_generation_sq.py \
544547 --tasks lambada_openai \
545548 --int8 --sq --accuracy \
546549 --batch_size 1 \
547- --alpha 0.65
550+ --alpha 1.0
548551```
549552
550553### Weight-Only Quantization
@@ -650,8 +653,10 @@ python run_generation_sq.py \
650653 --trust_remote_code \
651654 --tasks lambada_openai \
652655 --int8 --sq --accuracy \
656+ --calib_iters 512
653657 --batch_size 1 \
654- --alpha 0.75
658+ --recipes " {'smooth_quant':True,'smooth_quant_args':{'alpha':'auto','folding':False,'default_alpha':0.7,'auto_alpha_args':{'alpha_min':0.55,'alpha_max':0.8,'alpha_step':0.01,'shared_criterion':'mean','n_samples':64}}}" \
659+ --calib_iters 512
655660```
656661
657662### Weight-Only Quantization
@@ -702,8 +707,8 @@ python run_generation_sq.py \
702707 --trust_remote_code \
703708 --tasks lambada_openai \
704709 --int8 --sq --accuracy \
705- --batch_size 1 \
706- --alpha 0.9
710+ --recipes " {'smooth_quant':True,'smooth_quant_args':{'alpha':'auto','folding':False,'default_alpha':0.85,'auto_alpha_args':{'alpha_min':0.79,'alpha_max':0.88,'alpha_step':0.01,'shared_criterion':'mean'}}} " \
711+ --batch_size 1
707712```
708713
709714### Weight-Only Quantization
0 commit comments