From ba1ebc77f8ad832b8204c1333eda603136bea286 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 7 Nov 2025 12:27:20 +0800 Subject: [PATCH 1/3] update AutoRound layer_config usage Signed-off-by: Kaihui-intel --- docs/source/3x/PT_WeightOnlyQuant.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/source/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md index 2406d8d4150..5c4ca04a7ba 100644 --- a/docs/source/3x/PT_WeightOnlyQuant.md +++ b/docs/source/3x/PT_WeightOnlyQuant.md @@ -178,6 +178,8 @@ model = convert(model, config) # after this step, the model is ready for W4A8 i | not_use_best_mse (bool) | Whether to use mean squared error | False | | dynamic_max_gap (int) | The dynamic maximum gap | -1 | | scale_dtype (str) | The data type of quantization scale to be used, different kernels have different choices | "float16" | +| scheme (str) | A preset scheme that defines the quantization configurations. | "W4A16" | +| layer_config (dict) | Layer-wise quantization config | None | ``` python # Quantization code @@ -283,6 +285,23 @@ quant_config = RTNConfig() lm_head_config = RTNConfig(dtype="fp32") quant_config.set_local("lm_head", lm_head_config) ``` +3. Example of using `layer_config` for AutoRound +```python +# layer_config = { +# "layer1": { +# "data_type": "int", +# "bits": 3, +# "group_size": 128, +# "sym": True, +# }, +# "layer2": { +# "W8A16" +# } +# } +layer_config = {"lm_head": {"data_type": "int"}} +quant_config = AutoRoundConfig(layer_config=layer_config) +quant_config.set_local("lm_head", lm_head_config) +``` ### Saving and Loading From 2d76c7f6654c3c0bb8ba8773c37bf0d1eb9d7e76 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 7 Nov 2025 12:33:00 +0800 Subject: [PATCH 2/3] align format Signed-off-by: Kaihui-intel --- docs/source/3x/PT_WeightOnlyQuant.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md index 5c4ca04a7ba..2c442dec873 100644 --- a/docs/source/3x/PT_WeightOnlyQuant.md +++ b/docs/source/3x/PT_WeightOnlyQuant.md @@ -178,8 +178,8 @@ model = convert(model, config) # after this step, the model is ready for W4A8 i | not_use_best_mse (bool) | Whether to use mean squared error | False | | dynamic_max_gap (int) | The dynamic maximum gap | -1 | | scale_dtype (str) | The data type of quantization scale to be used, different kernels have different choices | "float16" | -| scheme (str) | A preset scheme that defines the quantization configurations. | "W4A16" | -| layer_config (dict) | Layer-wise quantization config | None | +| scheme (str) | A preset scheme that defines the quantization configurations. | "W4A16" | +| layer_config (dict) | Layer-wise quantization config | None | ``` python # Quantization code From f9d3ac9cb68a7ba157c154950299fa40a2be8b08 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 7 Nov 2025 13:46:02 +0800 Subject: [PATCH 3/3] update example Signed-off-by: Kaihui-intel --- docs/source/3x/PT_WeightOnlyQuant.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/3x/PT_WeightOnlyQuant.md b/docs/source/3x/PT_WeightOnlyQuant.md index 2c442dec873..8f7d1d70ba3 100644 --- a/docs/source/3x/PT_WeightOnlyQuant.md +++ b/docs/source/3x/PT_WeightOnlyQuant.md @@ -298,9 +298,9 @@ quant_config.set_local("lm_head", lm_head_config) # "W8A16" # } # } +# Use the AutoRound specific 'layer_config' instead of the 'set_local' API. layer_config = {"lm_head": {"data_type": "int"}} quant_config = AutoRoundConfig(layer_config=layer_config) -quant_config.set_local("lm_head", lm_head_config) ``` ### Saving and Loading