Fix convert script cannot generate bf16 weights (#104)

lsy323 · web-flow · commit 57e6fcf4d683 · 2024-05-28T14:25:18.000-07:00
fix convert script to generate bf16 weights
diff --git a/README.md b/README.md
@@ -70,7 +70,8 @@ Need to manually modify the `config.json` in the checkpoint folder to make it a
 export input_ckpt_dir=Original llama weights directory
 export output_ckpt_dir=The output directory
 export model_name="llama-3" # or "llama-2", "gemma"
-export quantize_type="int8_per_channel" # Availabe quantize type: {"int8", "int4"} x {"per_channel", "blockwise"}, setting this will quantize the weights
+export quantize_weights=True # Whether to quantize weights
+export quantize_type="int8_per_channel" # "quantize_weights" needs to be turned on. Availabe quantize type: {"int8", "int4"} x {"per_channel", "blockwise"}, "int8_per_channel" is the default option if not specified.
 python -m convert_checkpoints --model_name=$model_name --input_checkpoint_dir=$input_ckpt_dir --output_checkpoint_dir=$output_ckpt_dir --quantize_type=$quantize_type
 ```
 
diff --git a/convert_checkpoints.py b/convert_checkpoints.py
@@ -391,7 +391,7 @@ def main(argv) -> None:
         llama_model.Transformer.get_quantized_embedding_weight_to_scaler_map()
     )
 
-  if FLAGS.quantize_type:
+  if FLAGS.quantize_weights:
     quantize_num_bits = 8 if "int8" in FLAGS.quantize_type else 4
     is_blockwise = "blockwise" in FLAGS.quantize_type
     weight_axis = lambda x: 0 if x in quantize_embedding_weight_map else 1

Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,7 @@ def main(argv) -> None:`
`391`	`391`	`llama_model.Transformer.get_quantized_embedding_weight_to_scaler_map()`
`392`	`392`	`)`
`393`	`393`
`394`		`- if FLAGS.quantize_type:`
	`394`	`+ if FLAGS.quantize_weights:`
`395`	`395`	`quantize_num_bits = 8 if "int8" in FLAGS.quantize_type else 4`
`396`	`396`	`is_blockwise = "blockwise" in FLAGS.quantize_type`
`397`	`397`	`weight_axis = lambda x: 0 if x in quantize_embedding_weight_map else 1`