Skip to content

Commit 717e6e2

Browse files
committed
refactor torchao qwen module filtering
Summary: make it opt-out instead of opt-in, to match llm-compressor closer Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent ca98462 commit 717e6e2

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

hf_torchao_vllm/quantize_hf_model_with_torchao.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,21 @@ def get_quantization_config(args):
7373
expert_fqn_to_config = {}
7474
# TODO(future PR): this is annoying, I should be able to use a regex here
7575
for layer_idx in range(24):
76-
for expert_idx in range(60):
77-
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.gate_proj"] = single_config
78-
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.up_proj"] = single_config
79-
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.down_proj"] = single_config
76+
expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.q_proj"] = None
77+
expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.k_proj"] = None
78+
expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.v_proj"] = None
79+
expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.o_proj"] = None
80+
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.gate"] = None
81+
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.gate_proj"] = None
82+
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.up_proj"] = None
83+
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.down_proj"] = None
84+
expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert_gate"] = None
85+
expert_fqn_to_config[f"lm_head"] = None
8086
module_fqn_to_config = ModuleFqnToConfig({
81-
"_default": None,
87+
"_default": single_config,
8288
**expert_fqn_to_config,
8389
})
90+
8491
return TorchAoConfig(
8592
quant_type=module_fqn_to_config,
8693
)

0 commit comments

Comments
 (0)