fix llmc example for llama 4

vkuzo · vkuzo · commit b645db63961d · 2025-11-05T08:25:49.000-08:00
Summary:

need to enable calibration to get the experts to quantize

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/hf_torchao_vllm/quantize_hf_model_with_llm_compressor.py b/hf_torchao_vllm/quantize_hf_model_with_llm_compressor.py
@@ -3,6 +3,7 @@
 import fire
 from datasets import load_dataset
 from llmcompressor import oneshot
+from llmcompressor.modeling import replace_modules_for_calibration
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.utils import dispatch_for_generation
 
@@ -17,6 +18,7 @@ def run(
 
     # Load model.
     model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")
+    model = replace_modules_for_calibration(model)
     print(model)
     tokenizer = AutoTokenizer.from_pretrained(model_name)