File tree Expand file tree Collapse file tree 1 file changed +11
-4
lines changed Expand file tree Collapse file tree 1 file changed +11
-4
lines changed Original file line number Diff line number Diff line change 66from llmcompressor .modifiers .quantization import QuantizationModifier
77from llmcompressor .utils import dispatch_for_generation
88
9- import torch
109from transformers import AutoModelForCausalLM , AutoTokenizer
1110
1211
@@ -17,9 +16,7 @@ def run(
1716 assert quant_type in ("fp8" , "nvfp4" ), "unsupported"
1817
1918 # Load model.
20- model = AutoModelForCausalLM .from_pretrained (
21- model_name , torch_dtype = torch .bfloat16
22- )
19+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" )
2320 print (model )
2421 tokenizer = AutoTokenizer .from_pretrained (model_name )
2522
@@ -34,6 +31,16 @@ def run(
3431 "re:.*shared_expert.*" ,
3532 ]
3633 )
34+ elif model_name == "meta-llama/Llama-4-Scout-17B-16E-Instruct" :
35+ ignore_list .extend (
36+ [
37+ "re:.*self_attn" ,
38+ "re:.*router" ,
39+ "re:.*vision_model.*" ,
40+ "re:.*multi_modal_projector.*" ,
41+ "Llama4TextAttention" ,
42+ ]
43+ )
3744
3845 if quant_type == "fp8" :
3946 # Configure the quantization algorithm and scheme.
You can’t perform that action at this time.
0 commit comments