File tree Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -73,14 +73,21 @@ def get_quantization_config(args):
7373 expert_fqn_to_config = {}
7474 # TODO(future PR): this is annoying, I should be able to use a regex here
7575 for layer_idx in range (24 ):
76- for expert_idx in range (60 ):
77- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .gate_proj" ] = single_config
78- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .up_proj" ] = single_config
79- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .down_proj" ] = single_config
76+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.q_proj" ] = None
77+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.k_proj" ] = None
78+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.v_proj" ] = None
79+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.o_proj" ] = None
80+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.gate" ] = None
81+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.gate_proj" ] = None
82+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.up_proj" ] = None
83+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.down_proj" ] = None
84+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert_gate" ] = None
85+ expert_fqn_to_config [f"lm_head" ] = None
8086 module_fqn_to_config = ModuleFqnToConfig ({
81- "_default" : None ,
87+ "_default" : single_config ,
8288 ** expert_fqn_to_config ,
8389 })
90+
8491 return TorchAoConfig (
8592 quant_type = module_fqn_to_config ,
8693 )
You can’t perform that action at this time.
0 commit comments