@@ -668,30 +668,36 @@ class llama_context_params(ctypes.Structure):
668668
669669# // model quantization parameters
670670# typedef struct llama_model_quantize_params {
671- # int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
672- # enum llama_ftype ftype; // quantize to this llama_ftype
673- # bool allow_requantize; // allow quantizing non-f32/f16 tensors
674- # bool quantize_output_tensor; // quantize output.weight
675- # bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
676- # bool pure; // quantize all tensors to the default type
677- # void * imatrix; // pointer to importance matrix data
671+ # int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
672+ # enum llama_ftype ftype; // quantize to this llama_ftype
673+ # enum ggml_type output_tensor_type; // output tensor type
674+ # enum ggml_type token_embedding_type; // itoken embeddings tensor type
675+ # bool allow_requantize; // allow quantizing non-f32/f16 tensors
676+ # bool quantize_output_tensor; // quantize output.weight
677+ # bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
678+ # bool pure; // quantize all tensors to the default type
679+ # void * imatrix; // pointer to importance matrix data
678680# } llama_model_quantize_params;
679681class llama_model_quantize_params (ctypes .Structure ):
680682 """Parameters for llama_model_quantize
681683
682684 Attributes:
683685 nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
684686 ftype (int): quantize to this llama_ftype
687+ output_tensor_type (int): output tensor type
688+ token_embedding_type (int): itoken embeddings tensor type
685689 allow_requantize (bool): allow quantizing non-f32/f16 tensors
686690 quantize_output_tensor (bool): quantize output.weight
687691 only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
688692 pure (bool): quantize all tensors to the default type
689- imatrix (ctypes.ctypes. c_void_p): pointer to importance matrix data
693+ imatrix (ctypes.c_void_p): pointer to importance matrix data
690694 """
691695
692696 _fields_ = [
693697 ("nthread" , ctypes .c_int32 ),
694698 ("ftype" , ctypes .c_int ),
699+ ("output_tensor_type" , ctypes .c_int ),
700+ ("token_embedding_type" , ctypes .c_int ),
695701 ("allow_requantize" , ctypes .c_bool ),
696702 ("quantize_output_tensor" , ctypes .c_bool ),
697703 ("only_copy" , ctypes .c_bool ),
@@ -2743,6 +2749,48 @@ def llama_beam_search(
27432749): ...
27442750
27452751
2752+ # /// @details Build a split GGUF final path for this chunk.
2753+ # /// llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
2754+ # // Returns the split_path length.
2755+ # LLAMA_API int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix, int split_no, int split_count);
2756+ @ctypes_function (
2757+ "llama_split_path" ,
2758+ [ctypes .c_char_p , ctypes .c_size_t , ctypes .c_char_p , ctypes .c_int , ctypes .c_int ],
2759+ ctypes .c_int ,
2760+ )
2761+ def llama_split_path (
2762+ split_path : bytes ,
2763+ maxlen : Union [ctypes .c_size_t , int ],
2764+ path_prefix : bytes ,
2765+ split_no : Union [ctypes .c_int , int ],
2766+ split_count : Union [ctypes .c_int , int ],
2767+ / ,
2768+ ) -> int :
2769+ """Build a split GGUF final path for this chunk."""
2770+ ...
2771+
2772+
2773+ # /// @details Extract the path prefix from the split_path if and only if the split_no and split_count match.
2774+ # /// llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
2775+ # // Returns the split_prefix length.
2776+ # LLAMA_API int llama_split_prefix(char * split_prefix, size_t maxlen, const char * split_path, int split_no, int split_count);
2777+ @ctypes_function (
2778+ "llama_split_prefix" ,
2779+ [ctypes .c_char_p , ctypes .c_size_t , ctypes .c_char_p , ctypes .c_int , ctypes .c_int ],
2780+ ctypes .c_int ,
2781+ )
2782+ def llama_split_prefix (
2783+ split_prefix : bytes ,
2784+ maxlen : Union [ctypes .c_size_t , int ],
2785+ split_path : bytes ,
2786+ split_no : Union [ctypes .c_int , int ],
2787+ split_count : Union [ctypes .c_int , int ],
2788+ / ,
2789+ ) -> int :
2790+ """Extract the path prefix from the split_path if and only if the split_no and split_count match."""
2791+ ...
2792+
2793+
27462794# Performance information
27472795
27482796
0 commit comments