We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9de0416 commit b645db6Copy full SHA for b645db6
hf_torchao_vllm/quantize_hf_model_with_llm_compressor.py
@@ -3,6 +3,7 @@
3
import fire
4
from datasets import load_dataset
5
from llmcompressor import oneshot
6
+from llmcompressor.modeling import replace_modules_for_calibration
7
from llmcompressor.modifiers.quantization import QuantizationModifier
8
from llmcompressor.utils import dispatch_for_generation
9
@@ -17,6 +18,7 @@ def run(
17
18
19
# Load model.
20
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")
21
+ model = replace_modules_for_calibration(model)
22
print(model)
23
tokenizer = AutoTokenizer.from_pretrained(model_name)
24
0 commit comments