File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -944,7 +944,7 @@ class llama_context_params(ctypes.Structure):
944944# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
945945# enum llama_ftype ftype; // quantize to this llama_ftype
946946# enum ggml_type output_tensor_type; // output tensor type
947- # enum ggml_type token_embedding_type; // itoken embeddings tensor type
947+ # enum ggml_type token_embedding_type; // token embeddings tensor type
948948# bool allow_requantize; // allow quantizing non-f32/f16 tensors
949949# bool quantize_output_tensor; // quantize output.weight
950950# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
@@ -960,7 +960,7 @@ class llama_model_quantize_params(ctypes.Structure):
960960 nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
961961 ftype (int): quantize to this llama_ftype
962962 output_tensor_type (int): output tensor type
963- token_embedding_type (int): itoken embeddings tensor type
963+ token_embedding_type (int): token embeddings tensor type
964964 allow_requantize (bool): allow quantizing non-f32/f16 tensors
965965 quantize_output_tensor (bool): quantize output.weight
966966 only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
You can’t perform that action at this time.
0 commit comments