@@ -121,14 +121,21 @@ def get_quantization_config(args):
121121 expert_fqn_to_config = {}
122122 # TODO(future PR): this is annoying, I should be able to use a regex here
123123 for layer_idx in range (24 ):
124- for expert_idx in range (60 ):
125- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .gate_proj" ] = single_config
126- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .up_proj" ] = single_config
127- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .down_proj" ] = single_config
124+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.q_proj" ] = None
125+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.k_proj" ] = None
126+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.v_proj" ] = None
127+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.o_proj" ] = None
128+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.gate" ] = None
129+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.gate_proj" ] = None
130+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.up_proj" ] = None
131+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.down_proj" ] = None
132+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert_gate" ] = None
133+ expert_fqn_to_config [f"lm_head" ] = None
128134 module_fqn_to_config = ModuleFqnToConfig ({
129- "_default" : None ,
135+ "_default" : single_config ,
130136 ** expert_fqn_to_config ,
131137 })
138+
132139 return TorchAoConfig (
133140 quant_type = module_fqn_to_config ,
134141 )
@@ -162,12 +169,18 @@ def get_quantization_config(args):
162169 expert_fqn_to_config = {}
163170 # TODO(future PR): this is annoying, I should be able to use a regex here
164171 for layer_idx in range (24 ):
165- for expert_idx in range (60 ):
166- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .gate_proj" ] = single_config
167- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .up_proj" ] = single_config
168- expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.experts.{ expert_idx } .down_proj" ] = single_config
172+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.q_proj" ] = None
173+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.k_proj" ] = None
174+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.v_proj" ] = None
175+ expert_fqn_to_config [f"model.layers.{ layer_idx } .self_attn.o_proj" ] = None
176+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.gate" ] = None
177+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.gate_proj" ] = None
178+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.up_proj" ] = None
179+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert.down_proj" ] = None
180+ expert_fqn_to_config [f"model.layers.{ layer_idx } .mlp.shared_expert_gate" ] = None
181+ expert_fqn_to_config [f"lm_head" ] = None
169182 module_fqn_to_config = ModuleFqnToConfig ({
170- "_default" : None ,
183+ "_default" : single_config ,
171184 ** expert_fqn_to_config ,
172185 })
173186 return TorchAoConfig (
0 commit comments