From 70f568ad17423290928bb0814a916f1ce16bce4c Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 19:31:23 +0800 Subject: [PATCH 01/11] common: introduce auto sampling params from metadata Signed-off-by: Aaron Teo --- common/arg.cpp | 9 +++++++++ common/common.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ common/common.h | 13 +++++++++++++ 3 files changed, 63 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index a570810281499..97d896ea314c0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1261,6 +1261,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, const std::string & value) { params.sampling.temp = std::stof(value); params.sampling.temp = std::max(params.sampling.temp, 0.0f); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_TEMP; } ).set_sparam()); add_opt(common_arg( @@ -1268,6 +1269,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k), [](common_params & params, int value) { params.sampling.top_k = value; + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_TOP_K; } ).set_sparam()); add_opt(common_arg( @@ -1275,6 +1277,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p), [](common_params & params, const std::string & value) { params.sampling.top_p = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_TOP_P; } ).set_sparam()); add_opt(common_arg( @@ -1282,6 +1285,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p), [](common_params & params, const std::string & value) { params.sampling.min_p = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_MIN_P; } ).set_sparam()); add_opt(common_arg( @@ -1321,6 +1325,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } params.sampling.penalty_last_n = value; params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_PENALTY_LAST_N; } ).set_sparam()); add_opt(common_arg( @@ -1328,6 +1333,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat), [](common_params & params, const std::string & value) { params.sampling.penalty_repeat = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_PENALTY_REPEAT; } ).set_sparam()); add_opt(common_arg( @@ -1425,6 +1431,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat), [](common_params & params, int value) { params.sampling.mirostat = value; + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT; } ).set_sparam()); add_opt(common_arg( @@ -1432,6 +1439,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta), [](common_params & params, const std::string & value) { params.sampling.mirostat_eta = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_ETA; } ).set_sparam()); add_opt(common_arg( @@ -1439,6 +1447,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau), [](common_params & params, const std::string & value) { params.sampling.mirostat_tau = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_TAU; } ).set_sparam()); add_opt(common_arg( diff --git a/common/common.cpp b/common/common.cpp index a8d709ab1d050..51071fb1fc967 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -946,6 +946,45 @@ std::vector fs_list_files(const std::string & path) { // Model utils // +static inline void common_init_sampler_from_model( + const llama_model * model, + common_params_sampling & sparams) { + + const uint16_t mask = sparams.sampling_mask; + + auto get_int32 = [&](const char * key, int32_t & dst, uint16_t user_override) { + if (mask & user_override) return; + + char buf[64] = {0}; + if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) { + char * end = nullptr; + int32_t v = strtol(buf, &end, 10); + if (end && end != buf) dst = v; + } + }; + + auto get_float = [&](const char * key, float & dst, uint16_t user_override) { + if (mask & user_override) return; + + char buf[128] = {0}; + if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) { + char * end = nullptr; + float v = strtof(buf, &end); + if (end && end != buf) dst = v; + } + }; + + get_int32("general.sampler.top_k", sparams.top_k, common_params_sampling::SAMPLING_MASK_BITS_TOP_K); + get_float("general.sampler.top_p", sparams.top_p, common_params_sampling::SAMPLING_MASK_BITS_TOP_P); + get_float("general.sampler.min_p", sparams.min_p, common_params_sampling::SAMPLING_MASK_BITS_MIN_P); + get_float("general.sampler.temp", sparams.temp, common_params_sampling::SAMPLING_MASK_BITS_TEMP); + get_int32("general.sampler.penalty_last_n", sparams.penalty_last_n, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_LAST_N); + get_float("general.sampler.penalty_repeat", sparams.penalty_repeat, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_REPEAT); + get_int32("general.sampler.mirostat", sparams.mirostat, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT); + get_float("general.sampler.mirostat_tau", sparams.mirostat_tau, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_TAU); + get_float("general.sampler.mirostat_eta", sparams.mirostat_eta, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_ETA); +} + struct common_init_result common_init_from_params(common_params & params) { common_init_result iparams; auto mparams = common_model_params_to_llama(params); @@ -957,6 +996,8 @@ struct common_init_result common_init_from_params(common_params & params) { return iparams; } + common_init_sampler_from_model(model, params.sampling); + const llama_vocab * vocab = llama_model_get_vocab(model); auto cparams = common_context_params_to_llama(params); diff --git a/common/common.h b/common/common.h index 8540725aaa476..c0d5d86127b81 100644 --- a/common/common.h +++ b/common/common.h @@ -165,6 +165,19 @@ struct common_params_sampling { bool no_perf = false; // disable performance metrics bool timing_per_token = false; + uint16_t sampling_mask = 0; // bitfield to track user-specified samplers + enum sampling_mask_bits : uint16_t { + SAMPLING_MASK_BITS_TOP_K = 1 << 0, + SAMPLING_MASK_BITS_TOP_P = 1 << 1, + SAMPLING_MASK_BITS_MIN_P = 1 << 2, + SAMPLING_MASK_BITS_TEMP = 1 << 3, + SAMPLING_MASK_BITS_PENALTY_LAST_N = 1 << 4, + SAMPLING_MASK_BITS_PENALTY_REPEAT = 1 << 5, + SAMPLING_MASK_BITS_MIROSTAT = 1 << 6, + SAMPLING_MASK_BITS_MIROSTAT_TAU = 1 << 7, + SAMPLING_MASK_BITS_MIROSTAT_ETA = 1 << 8, + }; + std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY From 7de014e4e107ed8d429271cdfa96bc83bbda266a Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 20:10:25 +0800 Subject: [PATCH 02/11] gguf-py: introduce new kv for conversion scripts Signed-off-by: Aaron Teo --- gguf-py/gguf/constants.py | 11 ++++++++++ gguf-py/gguf/gguf_writer.py | 27 ++++++++++++++++++++++++ gguf-py/gguf/metadata.py | 41 +++++++++++++++++++++++++++++++++++++ src/llama-arch.cpp | 35 +++++++++++++++++++------------ src/llama-arch.h | 9 ++++++++ 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 6b4b6c5ab075d..b1b22581d9533 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -25,6 +25,17 @@ class General: ALIGNMENT = "general.alignment" FILE_TYPE = "general.file_type" + # Recommended Sampler Parameters + SAMPLER_TOP_K = "general.sampler.top_k" + SAMPLER_TOP_P = "general.sampler.top_p" + SAMPLER_MIN_P = "general.sampler.min_p" + SAMPLER_TEMP = "general.sampler.temp" + SAMPLER_PENALTY_LAST_N = "general.sampler.penalty_last_n" + SAMPLER_PENALTY_REPEAT = "general.sampler.penalty_repeat" + SAMPLER_MIROSTAT = "general.sampler.mirostat" + SAMPLER_MIROSTAT_TAU = "general.sampler.mirostat_tau" + SAMPLER_MIROSTAT_ETA = "general.sampler.mirostat_eta" + # Authorship Metadata NAME = "general.name" AUTHOR = "general.author" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index a051daeeb1341..23567281d1503 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -496,6 +496,33 @@ def add_custom_alignment(self, alignment: int) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_sampler_top_k(self, top_k: int) -> None: + self.add_int32(Keys.General.SAMPLER_TOP_K, top_k) + + def add_sampler_top_p(self, top_p: float) -> None: + self.add_float32(Keys.General.SAMPLER_TOP_P, top_p) + + def add_sampler_min_p(self, min_p: float) -> None: + self.add_float32(Keys.General.SAMPLER_MIN_P, min_p) + + def add_sampler_temp(self, temp: float) -> None: + self.add_float32(Keys.General.SAMPLER_TEMP, temp) + + def add_sampler_penalty_last_n(self, penalty_last_n: int) -> None: + self.add_int32(Keys.General.SAMPLER_PENALTY_LAST_N, penalty_last_n) + + def add_sampler_penalty_repeat(self, penalty_repeat: float) -> None: + self.add_float32(Keys.General.SAMPLER_PENALTY_REPEAT, penalty_repeat) + + def add_sampler_mirostat(self, mirostat: int) -> None: + self.add_int32(Keys.General.SAMPLER_MIROSTAT, mirostat) + + def add_sampler_mirostat_tau(self, mirostat_tau: float) -> None: + self.add_float32(Keys.General.SAMPLER_MIROSTAT_TAU, mirostat_tau) + + def add_sampler_mirostat_eta(self, mirostat_eta: float) -> None: + self.add_float32(Keys.General.SAMPLER_MIROSTAT_ETA, mirostat_eta) + def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 67efedbdbc564..249eddbc9c763 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -17,6 +17,17 @@ @dataclass class Metadata: + # Recommended Sampler Parameters to be written to GGUF KV Store + sampler_top_k: Optional[int] = None + sampler_top_p: Optional[float] = None + sampler_min_p: Optional[float] = None + sampler_temp: Optional[float] = None + sampler_penalty_last_n: Optional[int] = None + sampler_penalty_repeat: Optional[float] = None + sampler_mirostat: Optional[int] = None + sampler_mirostat_tau: Optional[float] = None + sampler_mirostat_eta: Optional[float] = None + # Authorship Metadata to be written to GGUF KV Store name: Optional[str] = None author: Optional[str] = None @@ -63,6 +74,16 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) + metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) + metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) + metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) + metadata.sampler_temp = metadata_override.get(Keys.General.SAMPLER_TEMP, metadata.sampler_temp) + metadata.sampler_penalty_last_n = metadata_override.get(Keys.General.SAMPLER_PENALTY_LAST_N, metadata.sampler_penalty_last_n) + metadata.sampler_penalty_repeat = metadata_override.get(Keys.General.SAMPLER_PENALTY_REPEAT, metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = metadata_override.get(Keys.General.SAMPLER_MIROSTAT, metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_TAU, metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_ETA, metadata.sampler_mirostat_eta) + metadata.name = metadata_override.get(Keys.General.NAME, metadata.name) metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version) @@ -546,6 +567,26 @@ def use_array_model_card_metadata(metadata_key: str, model_card_key: str): def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): assert self.name is not None + + if self.sampler_top_k is not None: + gguf_writer.add_sampler_top_k(self.sampler_top_k) + if self.sampler_top_p is not None: + gguf_writer.add_sampler_top_p(self.sampler_top_p) + if self.sampler_min_p is not None: + gguf_writer.add_sampler_min_p(self.sampler_min_p) + if self.sampler_temp is not None: + gguf_writer.add_sampler_temp(self.sampler_temp) + if self.sampler_penalty_last_n is not None: + gguf_writer.add_sampler_penalty_last_n(self.sampler_penalty_last_n) + if self.sampler_penalty_repeat is not None: + gguf_writer.add_sampler_penalty_repeat(self.sampler_penalty_repeat) + if self.sampler_mirostat is not None: + gguf_writer.add_sampler_mirostat(self.sampler_mirostat) + if self.sampler_mirostat_tau is not None: + gguf_writer.add_sampler_mirostat_tau(self.sampler_mirostat_tau) + if self.sampler_mirostat_eta is not None: + gguf_writer.add_sampler_mirostat_eta(self.sampler_mirostat_eta) + gguf_writer.add_name(self.name) if self.author is not None: diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index b7642b568dffb..2c30068712f74 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -112,19 +112,28 @@ static const std::map LLM_ARCH_NAMES = { }; static const std::map LLM_KV_NAMES = { - { LLM_KV_GENERAL_TYPE, "general.type" }, - { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, - { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, - { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, - { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, - { LLM_KV_GENERAL_NAME, "general.name" }, - { LLM_KV_GENERAL_AUTHOR, "general.author" }, - { LLM_KV_GENERAL_VERSION, "general.version" }, - { LLM_KV_GENERAL_URL, "general.url" }, - { LLM_KV_GENERAL_DESCRIPTION, "general.description" }, - { LLM_KV_GENERAL_LICENSE, "general.license" }, - { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, - { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, + { LLM_KV_GENERAL_TYPE, "general.type" }, + { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, + { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, + { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, + { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, + { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, + { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, + { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, + { LLM_KV_GENERAL_SAMPLER_TEMP, "general.sampler.temperature" }, + { LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, "general.sampler.penalty_last_n" }, + { LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, "general.sampler.penalty_repeat" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT, "general.sampler.mirostat" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT_TAU, "general.sampler.mirostat_tau" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT_ETA, "general.sampler.mirostat_eta" }, + { LLM_KV_GENERAL_NAME, "general.name" }, + { LLM_KV_GENERAL_AUTHOR, "general.author" }, + { LLM_KV_GENERAL_VERSION, "general.version" }, + { LLM_KV_GENERAL_URL, "general.url" }, + { LLM_KV_GENERAL_DESCRIPTION, "general.description" }, + { LLM_KV_GENERAL_LICENSE, "general.license" }, + { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, + { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, diff --git a/src/llama-arch.h b/src/llama-arch.h index a769dd1e85741..8ae8eed093e86 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -121,6 +121,15 @@ enum llm_kv { LLM_KV_GENERAL_QUANTIZATION_VERSION, LLM_KV_GENERAL_ALIGNMENT, LLM_KV_GENERAL_FILE_TYPE, + LLM_KV_GENERAL_SAMPLER_TOP_K, + LLM_KV_GENERAL_SAMPLER_TOP_P, + LLM_KV_GENERAL_SAMPLER_MIN_P, + LLM_KV_GENERAL_SAMPLER_TEMP, + LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, + LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, + LLM_KV_GENERAL_SAMPLER_MIROSTAT, + LLM_KV_GENERAL_SAMPLER_MIROSTAT_TAU, + LLM_KV_GENERAL_SAMPLER_MIROSTAT_ETA, LLM_KV_GENERAL_NAME, LLM_KV_GENERAL_AUTHOR, LLM_KV_GENERAL_VERSION, From c41bb285831f1aa0bde2d92663a59c954ec21428 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 20:39:24 +0800 Subject: [PATCH 03/11] gguf-py: fix formatting Signed-off-by: Aaron Teo --- gguf-py/gguf/metadata.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 249eddbc9c763..b9c9697c8086c 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -74,15 +74,15 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) - metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) - metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) - metadata.sampler_temp = metadata_override.get(Keys.General.SAMPLER_TEMP, metadata.sampler_temp) + metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) + metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) + metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) + metadata.sampler_temp = metadata_override.get(Keys.General.SAMPLER_TEMP, metadata.sampler_temp) metadata.sampler_penalty_last_n = metadata_override.get(Keys.General.SAMPLER_PENALTY_LAST_N, metadata.sampler_penalty_last_n) - metadata.sampler_penalty_repeat = metadata_override.get(Keys.General.SAMPLER_PENALTY_REPEAT, metadata.sampler_penalty_repeat) - metadata.sampler_mirostat = metadata_override.get(Keys.General.SAMPLER_MIROSTAT, metadata.sampler_mirostat) - metadata.sampler_mirostat_tau = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_TAU, metadata.sampler_mirostat_tau) - metadata.sampler_mirostat_eta = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_ETA, metadata.sampler_mirostat_eta) + metadata.sampler_penalty_repeat = metadata_override.get(Keys.General.SAMPLER_PENALTY_REPEAT, metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = metadata_override.get(Keys.General.SAMPLER_MIROSTAT, metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_TAU, metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_ETA, metadata.sampler_mirostat_eta) metadata.name = metadata_override.get(Keys.General.NAME, metadata.name) metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) From caa7a039f16da11f43dd9c486755a95a9de93de7 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 20:40:29 +0800 Subject: [PATCH 04/11] gguf-py: fix more formatting issues Signed-off-by: Aaron Teo --- gguf-py/gguf/constants.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index b1b22581d9533..4e8e3a15763fe 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -26,15 +26,15 @@ class General: FILE_TYPE = "general.file_type" # Recommended Sampler Parameters - SAMPLER_TOP_K = "general.sampler.top_k" - SAMPLER_TOP_P = "general.sampler.top_p" - SAMPLER_MIN_P = "general.sampler.min_p" - SAMPLER_TEMP = "general.sampler.temp" - SAMPLER_PENALTY_LAST_N = "general.sampler.penalty_last_n" - SAMPLER_PENALTY_REPEAT = "general.sampler.penalty_repeat" - SAMPLER_MIROSTAT = "general.sampler.mirostat" - SAMPLER_MIROSTAT_TAU = "general.sampler.mirostat_tau" - SAMPLER_MIROSTAT_ETA = "general.sampler.mirostat_eta" + SAMPLER_TOP_K = "general.sampler.top_k" + SAMPLER_TOP_P = "general.sampler.top_p" + SAMPLER_MIN_P = "general.sampler.min_p" + SAMPLER_TEMP = "general.sampler.temp" + SAMPLER_PENALTY_LAST_N = "general.sampler.penalty_last_n" + SAMPLER_PENALTY_REPEAT = "general.sampler.penalty_repeat" + SAMPLER_MIROSTAT = "general.sampler.mirostat" + SAMPLER_MIROSTAT_TAU = "general.sampler.mirostat_tau" + SAMPLER_MIROSTAT_ETA = "general.sampler.mirostat_eta" # Authorship Metadata NAME = "general.name" From 44addcebd90673c33c10e66210f87a1ca7e3f2a4 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 22:21:18 +0800 Subject: [PATCH 05/11] gguf-py: introduce support for reading from generation_config.py Signed-off-by: Aaron Teo --- gguf-py/gguf/metadata.py | 48 +++++++++++++++ gguf-py/tests/test_metadata.py | 103 +++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index b9c9697c8086c..72ca4b93b78df 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -65,11 +65,42 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat model_card = Metadata.load_model_card(model_path) hf_params = Metadata.load_hf_parameters(model_path) + gen_config = Metadata.load_generation_config(model_path) # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter # heuristics metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) + if gen_config: + # Standard generation_config.json parameters + if metadata.sampler_top_k is None and "top_k" in gen_config: + metadata.sampler_top_k = int(gen_config["top_k"]) + + if metadata.sampler_top_p is None and "top_p" in gen_config: + metadata.sampler_top_p = float(gen_config["top_p"]) + + if metadata.sampler_min_p is None and "min_p" in gen_config: + metadata.sampler_min_p = float(gen_config["min_p"]) + + if metadata.sampler_temp is None and "temperature" in gen_config: + metadata.sampler_temp = float(gen_config["temperature"]) + + # Non-standard generation_config.json parameters + if metadata.sampler_penalty_last_n is None and "penalty_last_n" in gen_config: + metadata.sampler_penalty_last_n = int(gen_config["penalty_last_n"]) + + if metadata.sampler_penalty_repeat is None and "penalty_repeat" in gen_config: + metadata.sampler_penalty_repeat = float(gen_config["penalty_repeat"]) + + if metadata.sampler_mirostat is None and "mirostat" in gen_config: + metadata.sampler_mirostat = int(gen_config["mirostat"]) + + if metadata.sampler_mirostat_tau is None and "mirostat_tau" in gen_config: + metadata.sampler_mirostat_tau = float(gen_config["mirostat_tau"]) + + if metadata.sampler_mirostat_eta is None and "mirostat_eta" in gen_config: + metadata.sampler_mirostat_eta = float(gen_config["mirostat_eta"]) + # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) @@ -193,6 +224,23 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: with open(config_path, "r", encoding="utf-8") as f: return json.load(f) + @staticmethod + def load_generation_config(model_path: Optional[Path] = None) -> dict[str, Any]: + if model_path is None or not model_path.is_dir(): + return {} + + generation_config_path = model_path / "generation_config.json" + + if not generation_config_path.is_file(): + return {} + + try: + with open(generation_config_path, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, IOError) as e: + # not all models have valid generation_config.json + return {} + @staticmethod def id_to_title(string): # Convert capitalization into title form unless acronym or version number diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 40d484f4eaa9d..93e14807cbb23 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -233,6 +233,109 @@ def test_apply_metadata_heuristic_from_model_dir(self): expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) + def test_load_generation_config(self): + import tempfile + import json + + # Test with a valid generation_config.json + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + gen_config_path = tmpdir_path / "generation_config.json" + + # Create a sample generation_config.json + gen_config_data = { + "temperature": 0.7, + "top_k": 50, + "top_p": 0.95, + "repetition_penalty": 1.1, + "do_sample": True, + "max_length": 2048 + } + + with open(gen_config_path, "w") as f: + json.dump(gen_config_data, f) + + # Test loading the file + result = gguf.Metadata.load_generation_config(tmpdir_path) + self.assertEqual(result, gen_config_data) + + # Test with missing file + with tempfile.TemporaryDirectory() as tmpdir: + result = gguf.Metadata.load_generation_config(Path(tmpdir)) + self.assertEqual(result, {}) + + # Test with None path + result = gguf.Metadata.load_generation_config(None) + self.assertEqual(result, {}) + + def test_metadata_load_with_generation_config(self): + import tempfile + import json + + # Test that generation_config values are properly loaded into metadata + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + gen_config_path = tmpdir_path / "generation_config.json" + + # Create a sample generation_config.json with sampling parameters + gen_config_data = { + "temperature": 0.8, + "top_k": 40, + "top_p": 0.9, + "min_p": 0.05, + "repetition_penalty": 1.15, + } + + with open(gen_config_path, "w") as f: + json.dump(gen_config_data, f) + + # Load metadata with generation config + metadata = gguf.Metadata.load(model_path=tmpdir_path) + + # Verify sampling parameters were loaded + self.assertEqual(metadata.sampler_temp, 0.8) + self.assertEqual(metadata.sampler_top_k, 40) + self.assertEqual(metadata.sampler_top_p, 0.9) + self.assertEqual(metadata.sampler_min_p, 0.05) + self.assertEqual(metadata.sampler_penalty_repeat, 1.15) + + def test_metadata_override_precedence(self): + import tempfile + import json + + # Test that metadata_override takes precedence over generation_config + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + gen_config_path = tmpdir_path / "generation_config.json" + metadata_override_path = tmpdir_path / "metadata.json" + + # Create generation_config.json + gen_config_data = { + "temperature": 0.7, + "top_k": 50, + } + with open(gen_config_path, "w") as f: + json.dump(gen_config_data, f) + + # Create metadata.json that overrides temperature + metadata_override_data = { + "general.sampler.temp": 0.5, + } + with open(metadata_override_path, "w") as f: + json.dump(metadata_override_data, f) + + # Load metadata with both files present + metadata = gguf.Metadata.load( + metadata_override_path=metadata_override_path, + model_path=tmpdir_path + ) + + # Verify that metadata_override takes precedence for temperature + self.assertEqual(metadata.sampler_temp, 0.5) + # Verify that generation_config value is used for top_k + self.assertEqual(metadata.sampler_top_k, 50) + if __name__ == "__main__": unittest.main() + From 0f8d637cbd3d8834905a97e4bc30d14200bfbe08 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 22:33:18 +0800 Subject: [PATCH 06/11] gguf-py: simplified gen_config loading Signed-off-by: Aaron Teo --- gguf-py/gguf/metadata.py | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 72ca4b93b78df..ca6a710065960 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -72,34 +72,15 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) if gen_config: - # Standard generation_config.json parameters - if metadata.sampler_top_k is None and "top_k" in gen_config: - metadata.sampler_top_k = int(gen_config["top_k"]) - - if metadata.sampler_top_p is None and "top_p" in gen_config: - metadata.sampler_top_p = float(gen_config["top_p"]) - - if metadata.sampler_min_p is None and "min_p" in gen_config: - metadata.sampler_min_p = float(gen_config["min_p"]) - - if metadata.sampler_temp is None and "temperature" in gen_config: - metadata.sampler_temp = float(gen_config["temperature"]) - - # Non-standard generation_config.json parameters - if metadata.sampler_penalty_last_n is None and "penalty_last_n" in gen_config: - metadata.sampler_penalty_last_n = int(gen_config["penalty_last_n"]) - - if metadata.sampler_penalty_repeat is None and "penalty_repeat" in gen_config: - metadata.sampler_penalty_repeat = float(gen_config["penalty_repeat"]) - - if metadata.sampler_mirostat is None and "mirostat" in gen_config: - metadata.sampler_mirostat = int(gen_config["mirostat"]) - - if metadata.sampler_mirostat_tau is None and "mirostat_tau" in gen_config: - metadata.sampler_mirostat_tau = float(gen_config["mirostat_tau"]) - - if metadata.sampler_mirostat_eta is None and "mirostat_eta" in gen_config: - metadata.sampler_mirostat_eta = float(gen_config["mirostat_eta"]) + metadata.sampler_top_k = gen_config.get("top_k", metadata.sampler_top_k) + metadata.sampler_top_p = gen_config.get("top_p", metadata.sampler_top_p) + metadata.sampler_min_p = gen_config.get("min_p", metadata.sampler_min_p) + metadata.sampler_temp = gen_config.get("temperature", metadata.sampler_temp) + metadata.sampler_penalty_last_n = gen_config.get("penalty_last_n", metadata.sampler_penalty_last_n) + metadata.sampler_penalty_repeat = gen_config.get("penalty_repeat", metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = gen_config.get("mirostat", metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = gen_config.get("mirostat_tau", metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = gen_config.get("mirostat_eta", metadata.sampler_mirostat_eta) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp From 6cf39000a945415385186d789b837ebda122d12c Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 22:42:04 +0800 Subject: [PATCH 07/11] llama: add support for xtc sampler Signed-off-by: Aaron Teo --- common/arg.cpp | 2 ++ common/common.cpp | 20 ++++++++-------- common/common.h | 20 ++++++++-------- gguf-py/gguf/constants.py | 2 ++ gguf-py/gguf/gguf_writer.py | 6 +++++ gguf-py/gguf/metadata.py | 46 ++++++++++++++++++++++--------------- src/llama-arch.cpp | 18 ++++++++------- src/llama-arch.h | 2 ++ 8 files changed, 72 insertions(+), 44 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 97d896ea314c0..9ce9df59741b3 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1300,6 +1300,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability), [](common_params & params, const std::string & value) { params.sampling.xtc_probability = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_XTC_PROBABILITY; } ).set_sparam()); add_opt(common_arg( @@ -1307,6 +1308,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold), [](common_params & params, const std::string & value) { params.sampling.xtc_threshold = std::stof(value); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_XTC_THRESHOLD; } ).set_sparam()); add_opt(common_arg( diff --git a/common/common.cpp b/common/common.cpp index 51071fb1fc967..0f3a1a6c5461e 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -974,15 +974,17 @@ static inline void common_init_sampler_from_model( } }; - get_int32("general.sampler.top_k", sparams.top_k, common_params_sampling::SAMPLING_MASK_BITS_TOP_K); - get_float("general.sampler.top_p", sparams.top_p, common_params_sampling::SAMPLING_MASK_BITS_TOP_P); - get_float("general.sampler.min_p", sparams.min_p, common_params_sampling::SAMPLING_MASK_BITS_MIN_P); - get_float("general.sampler.temp", sparams.temp, common_params_sampling::SAMPLING_MASK_BITS_TEMP); - get_int32("general.sampler.penalty_last_n", sparams.penalty_last_n, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_LAST_N); - get_float("general.sampler.penalty_repeat", sparams.penalty_repeat, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_REPEAT); - get_int32("general.sampler.mirostat", sparams.mirostat, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT); - get_float("general.sampler.mirostat_tau", sparams.mirostat_tau, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_TAU); - get_float("general.sampler.mirostat_eta", sparams.mirostat_eta, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_ETA); + get_int32("general.sampler.top_k", sparams.top_k, common_params_sampling::SAMPLING_MASK_BITS_TOP_K); + get_float("general.sampler.top_p", sparams.top_p, common_params_sampling::SAMPLING_MASK_BITS_TOP_P); + get_float("general.sampler.min_p", sparams.min_p, common_params_sampling::SAMPLING_MASK_BITS_MIN_P); + get_float("general.sampler.xtc_probability", sparams.xtc_probability, common_params_sampling::SAMPLING_MASK_BITS_XTC_PROBABILITY); + get_float("general.sampler.xtc_threshold", sparams.xtc_threshold, common_params_sampling::SAMPLING_MASK_BITS_XTC_THRESHOLD); + get_float("general.sampler.temp", sparams.temp, common_params_sampling::SAMPLING_MASK_BITS_TEMP); + get_int32("general.sampler.penalty_last_n", sparams.penalty_last_n, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_LAST_N); + get_float("general.sampler.penalty_repeat", sparams.penalty_repeat, common_params_sampling::SAMPLING_MASK_BITS_PENALTY_REPEAT); + get_int32("general.sampler.mirostat", sparams.mirostat, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT); + get_float("general.sampler.mirostat_tau", sparams.mirostat_tau, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_TAU); + get_float("general.sampler.mirostat_eta", sparams.mirostat_eta, common_params_sampling::SAMPLING_MASK_BITS_MIROSTAT_ETA); } struct common_init_result common_init_from_params(common_params & params) { diff --git a/common/common.h b/common/common.h index c0d5d86127b81..5081361394ddd 100644 --- a/common/common.h +++ b/common/common.h @@ -167,15 +167,17 @@ struct common_params_sampling { uint16_t sampling_mask = 0; // bitfield to track user-specified samplers enum sampling_mask_bits : uint16_t { - SAMPLING_MASK_BITS_TOP_K = 1 << 0, - SAMPLING_MASK_BITS_TOP_P = 1 << 1, - SAMPLING_MASK_BITS_MIN_P = 1 << 2, - SAMPLING_MASK_BITS_TEMP = 1 << 3, - SAMPLING_MASK_BITS_PENALTY_LAST_N = 1 << 4, - SAMPLING_MASK_BITS_PENALTY_REPEAT = 1 << 5, - SAMPLING_MASK_BITS_MIROSTAT = 1 << 6, - SAMPLING_MASK_BITS_MIROSTAT_TAU = 1 << 7, - SAMPLING_MASK_BITS_MIROSTAT_ETA = 1 << 8, + SAMPLING_MASK_BITS_TOP_K = 1 << 0, + SAMPLING_MASK_BITS_TOP_P = 1 << 1, + SAMPLING_MASK_BITS_MIN_P = 1 << 2, + SAMPLING_MASK_BITS_XTC_PROBABILITY = 1 << 3, + SAMPLING_MASK_BITS_XTC_THRESHOLD = 1 << 4, + SAMPLING_MASK_BITS_TEMP = 1 << 5, + SAMPLING_MASK_BITS_PENALTY_LAST_N = 1 << 6, + SAMPLING_MASK_BITS_PENALTY_REPEAT = 1 << 7, + SAMPLING_MASK_BITS_MIROSTAT = 1 << 8, + SAMPLING_MASK_BITS_MIROSTAT_TAU = 1 << 9, + SAMPLING_MASK_BITS_MIROSTAT_ETA = 1 << 10, }; std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 4e8e3a15763fe..9af3c20b9124e 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -29,6 +29,8 @@ class General: SAMPLER_TOP_K = "general.sampler.top_k" SAMPLER_TOP_P = "general.sampler.top_p" SAMPLER_MIN_P = "general.sampler.min_p" + SAMPLER_XTC_PROBABILITY = "general.sampler.xtc_probability" + SAMPLER_XTC_THRESHOLD = "general.sampler.xtc_threshold" SAMPLER_TEMP = "general.sampler.temp" SAMPLER_PENALTY_LAST_N = "general.sampler.penalty_last_n" SAMPLER_PENALTY_REPEAT = "general.sampler.penalty_repeat" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 23567281d1503..22ba27ed6130c 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -505,6 +505,12 @@ def add_sampler_top_p(self, top_p: float) -> None: def add_sampler_min_p(self, min_p: float) -> None: self.add_float32(Keys.General.SAMPLER_MIN_P, min_p) + def add_sampler_xtc_probability(self, xtc_probability: float) -> None: + self.add_float32(Keys.General.SAMPLER_XTC_PROBABILITY, xtc_probability) + + def add_sampler_xtc_threshold(self, xtc_threshold: float) -> None: + self.add_float32(Keys.General.SAMPLER_XTC_THRESHOLD, xtc_threshold) + def add_sampler_temp(self, temp: float) -> None: self.add_float32(Keys.General.SAMPLER_TEMP, temp) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index ca6a710065960..f7ed17bababa7 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -21,6 +21,8 @@ class Metadata: sampler_top_k: Optional[int] = None sampler_top_p: Optional[float] = None sampler_min_p: Optional[float] = None + sampler_xtc_probability: Optional[float] = None + sampler_xtc_threshold: Optional[float] = None sampler_temp: Optional[float] = None sampler_penalty_last_n: Optional[int] = None sampler_penalty_repeat: Optional[float] = None @@ -72,29 +74,33 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) if gen_config: - metadata.sampler_top_k = gen_config.get("top_k", metadata.sampler_top_k) - metadata.sampler_top_p = gen_config.get("top_p", metadata.sampler_top_p) - metadata.sampler_min_p = gen_config.get("min_p", metadata.sampler_min_p) - metadata.sampler_temp = gen_config.get("temperature", metadata.sampler_temp) - metadata.sampler_penalty_last_n = gen_config.get("penalty_last_n", metadata.sampler_penalty_last_n) - metadata.sampler_penalty_repeat = gen_config.get("penalty_repeat", metadata.sampler_penalty_repeat) - metadata.sampler_mirostat = gen_config.get("mirostat", metadata.sampler_mirostat) - metadata.sampler_mirostat_tau = gen_config.get("mirostat_tau", metadata.sampler_mirostat_tau) - metadata.sampler_mirostat_eta = gen_config.get("mirostat_eta", metadata.sampler_mirostat_eta) + metadata.sampler_top_k = gen_config.get("top_k", metadata.sampler_top_k) + metadata.sampler_top_p = gen_config.get("top_p", metadata.sampler_top_p) + metadata.sampler_min_p = gen_config.get("min_p", metadata.sampler_min_p) + metadata.sampler_xtc_probability = gen_config.get("xtc_probability", metadata.sampler_xtc_probability) + metadata.sampler_xtc_threshold = gen_config.get("xtc_threshold", metadata.sampler_xtc_threshold) + metadata.sampler_temp = gen_config.get("temperature", metadata.sampler_temp) + metadata.sampler_penalty_last_n = gen_config.get("penalty_last_n", metadata.sampler_penalty_last_n) + metadata.sampler_penalty_repeat = gen_config.get("penalty_repeat", metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = gen_config.get("mirostat", metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = gen_config.get("mirostat_tau", metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = gen_config.get("mirostat_eta", metadata.sampler_mirostat_eta) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) - metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) - metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) - metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) - metadata.sampler_temp = metadata_override.get(Keys.General.SAMPLER_TEMP, metadata.sampler_temp) - metadata.sampler_penalty_last_n = metadata_override.get(Keys.General.SAMPLER_PENALTY_LAST_N, metadata.sampler_penalty_last_n) - metadata.sampler_penalty_repeat = metadata_override.get(Keys.General.SAMPLER_PENALTY_REPEAT, metadata.sampler_penalty_repeat) - metadata.sampler_mirostat = metadata_override.get(Keys.General.SAMPLER_MIROSTAT, metadata.sampler_mirostat) - metadata.sampler_mirostat_tau = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_TAU, metadata.sampler_mirostat_tau) - metadata.sampler_mirostat_eta = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_ETA, metadata.sampler_mirostat_eta) + metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) + metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) + metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) + metadata.sampler_xtc_probability = metadata_override.get(Keys.General.SAMPLER_XTC_PROBABILITY, metadata.sampler_xtc_probability) + metadata.sampler_xtc_threshold = metadata_override.get(Keys.General.SAMPLER_XTC_THRESHOLD, metadata.sampler_xtc_threshold) + metadata.sampler_temp = metadata_override.get(Keys.General.SAMPLER_TEMP, metadata.sampler_temp) + metadata.sampler_penalty_last_n = metadata_override.get(Keys.General.SAMPLER_PENALTY_LAST_N, metadata.sampler_penalty_last_n) + metadata.sampler_penalty_repeat = metadata_override.get(Keys.General.SAMPLER_PENALTY_REPEAT, metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = metadata_override.get(Keys.General.SAMPLER_MIROSTAT, metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_TAU, metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = metadata_override.get(Keys.General.SAMPLER_MIROSTAT_ETA, metadata.sampler_mirostat_eta) metadata.name = metadata_override.get(Keys.General.NAME, metadata.name) metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author) @@ -603,6 +609,10 @@ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): gguf_writer.add_sampler_top_p(self.sampler_top_p) if self.sampler_min_p is not None: gguf_writer.add_sampler_min_p(self.sampler_min_p) + if self.sampler_xtc_probability is not None: + gguf_writer.add_sampler_xtc_probability(self.sampler_xtc_probability) + if self.sampler_xtc_threshold is not None: + gguf_writer.add_sampler_xtc_threshold(self.sampler_xtc_threshold) if self.sampler_temp is not None: gguf_writer.add_sampler_temp(self.sampler_temp) if self.sampler_penalty_last_n is not None: diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 2c30068712f74..dd5b7ae808c91 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -112,14 +112,16 @@ static const std::map LLM_ARCH_NAMES = { }; static const std::map LLM_KV_NAMES = { - { LLM_KV_GENERAL_TYPE, "general.type" }, - { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, - { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, - { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, - { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, - { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, - { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, - { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, + { LLM_KV_GENERAL_TYPE, "general.type" }, + { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, + { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, + { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, + { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, + { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, + { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, + { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, + { LLM_KV_GENERAL_SAMPLER_XTC_PROBABILITY, "general.sampler.xtc_probability" }, + { LLM_KV_GENERAL_SAMPLER_XTC_THRESHOLD, "general.sampler.xtc_threshold" }, { LLM_KV_GENERAL_SAMPLER_TEMP, "general.sampler.temperature" }, { LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, "general.sampler.penalty_last_n" }, { LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, "general.sampler.penalty_repeat" }, diff --git a/src/llama-arch.h b/src/llama-arch.h index 8ae8eed093e86..6ed2731e1d4c6 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -124,6 +124,8 @@ enum llm_kv { LLM_KV_GENERAL_SAMPLER_TOP_K, LLM_KV_GENERAL_SAMPLER_TOP_P, LLM_KV_GENERAL_SAMPLER_MIN_P, + LLM_KV_GENERAL_SAMPLER_XTC_PROBABILITY, + LLM_KV_GENERAL_SAMPLER_XTC_THRESHOLD, LLM_KV_GENERAL_SAMPLER_TEMP, LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, From c8845ff5de4f00aba9fbd87f6f6229a67d6f9703 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 22:42:43 +0800 Subject: [PATCH 08/11] chore: formatting Signed-off-by: Aaron Teo --- src/llama-arch.cpp | 48 +++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index dd5b7ae808c91..a48d42442edad 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -112,30 +112,30 @@ static const std::map LLM_ARCH_NAMES = { }; static const std::map LLM_KV_NAMES = { - { LLM_KV_GENERAL_TYPE, "general.type" }, - { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, - { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, - { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, - { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, - { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, - { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, - { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, - { LLM_KV_GENERAL_SAMPLER_XTC_PROBABILITY, "general.sampler.xtc_probability" }, - { LLM_KV_GENERAL_SAMPLER_XTC_THRESHOLD, "general.sampler.xtc_threshold" }, - { LLM_KV_GENERAL_SAMPLER_TEMP, "general.sampler.temperature" }, - { LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, "general.sampler.penalty_last_n" }, - { LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, "general.sampler.penalty_repeat" }, - { LLM_KV_GENERAL_SAMPLER_MIROSTAT, "general.sampler.mirostat" }, - { LLM_KV_GENERAL_SAMPLER_MIROSTAT_TAU, "general.sampler.mirostat_tau" }, - { LLM_KV_GENERAL_SAMPLER_MIROSTAT_ETA, "general.sampler.mirostat_eta" }, - { LLM_KV_GENERAL_NAME, "general.name" }, - { LLM_KV_GENERAL_AUTHOR, "general.author" }, - { LLM_KV_GENERAL_VERSION, "general.version" }, - { LLM_KV_GENERAL_URL, "general.url" }, - { LLM_KV_GENERAL_DESCRIPTION, "general.description" }, - { LLM_KV_GENERAL_LICENSE, "general.license" }, - { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, - { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, + { LLM_KV_GENERAL_TYPE, "general.type" }, + { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, + { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, + { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, + { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, + { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, + { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, + { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, + { LLM_KV_GENERAL_SAMPLER_XTC_PROBABILITY, "general.sampler.xtc_probability" }, + { LLM_KV_GENERAL_SAMPLER_XTC_THRESHOLD, "general.sampler.xtc_threshold" }, + { LLM_KV_GENERAL_SAMPLER_TEMP, "general.sampler.temperature" }, + { LLM_KV_GENERAL_SAMPLER_PENALTY_LAST_N, "general.sampler.penalty_last_n" }, + { LLM_KV_GENERAL_SAMPLER_PENALTY_REPEAT, "general.sampler.penalty_repeat" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT, "general.sampler.mirostat" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT_TAU, "general.sampler.mirostat_tau" }, + { LLM_KV_GENERAL_SAMPLER_MIROSTAT_ETA, "general.sampler.mirostat_eta" }, + { LLM_KV_GENERAL_NAME, "general.name" }, + { LLM_KV_GENERAL_AUTHOR, "general.author" }, + { LLM_KV_GENERAL_VERSION, "general.version" }, + { LLM_KV_GENERAL_URL, "general.url" }, + { LLM_KV_GENERAL_DESCRIPTION, "general.description" }, + { LLM_KV_GENERAL_LICENSE, "general.license" }, + { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, + { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, From 33ddb2742c388b8d53a9a85080b8ba405125dcca Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 23:20:40 +0800 Subject: [PATCH 09/11] common: introduce support for general.sampler.sequence Signed-off-by: Aaron Teo --- common/arg.cpp | 1 + common/common.cpp | 12 ++++++++++++ common/common.h | 23 ++++++++++++----------- gguf-py/gguf/constants.py | 1 + gguf-py/gguf/gguf_writer.py | 3 +++ gguf-py/gguf/metadata.py | 25 +++++++++++++++---------- src/llama-arch.cpp | 1 + src/llama-arch.h | 1 + 8 files changed, 46 insertions(+), 21 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 9ce9df59741b3..c7a85a33c0129 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1232,6 +1232,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, const std::string & value) { const auto sampler_names = string_split(value, ';'); params.sampling.samplers = common_sampler_types_from_names(sampler_names, true); + params.sampling.sampling_mask |= common_params_sampling::SAMPLING_MASK_BITS_SAMPLERS; } ).set_sparam()); add_opt(common_arg( diff --git a/common/common.cpp b/common/common.cpp index 0f3a1a6c5461e..0250024a4e497 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -8,6 +8,7 @@ #include "common.h" #include "log.h" #include "llama.h" +#include "sampling.h" #include #include @@ -974,6 +975,17 @@ static inline void common_init_sampler_from_model( } }; + // Sampler sequence + if (!(mask & common_params_sampling::SAMPLING_MASK_BITS_SAMPLERS)) { + char buf[512] = {0}; + if (llama_model_meta_val_str(model, "general.sampler.sequence", buf, sizeof(buf)) > 0) { + const std::vector sampler_names = string_split(std::string(buf), ';'); + if (!sampler_names.empty()) { + sparams.samplers = common_sampler_types_from_names(sampler_names, true); + } + } + } + get_int32("general.sampler.top_k", sparams.top_k, common_params_sampling::SAMPLING_MASK_BITS_TOP_K); get_float("general.sampler.top_p", sparams.top_p, common_params_sampling::SAMPLING_MASK_BITS_TOP_P); get_float("general.sampler.min_p", sparams.min_p, common_params_sampling::SAMPLING_MASK_BITS_MIN_P); diff --git a/common/common.h b/common/common.h index 5081361394ddd..19a708b2452c2 100644 --- a/common/common.h +++ b/common/common.h @@ -167,17 +167,18 @@ struct common_params_sampling { uint16_t sampling_mask = 0; // bitfield to track user-specified samplers enum sampling_mask_bits : uint16_t { - SAMPLING_MASK_BITS_TOP_K = 1 << 0, - SAMPLING_MASK_BITS_TOP_P = 1 << 1, - SAMPLING_MASK_BITS_MIN_P = 1 << 2, - SAMPLING_MASK_BITS_XTC_PROBABILITY = 1 << 3, - SAMPLING_MASK_BITS_XTC_THRESHOLD = 1 << 4, - SAMPLING_MASK_BITS_TEMP = 1 << 5, - SAMPLING_MASK_BITS_PENALTY_LAST_N = 1 << 6, - SAMPLING_MASK_BITS_PENALTY_REPEAT = 1 << 7, - SAMPLING_MASK_BITS_MIROSTAT = 1 << 8, - SAMPLING_MASK_BITS_MIROSTAT_TAU = 1 << 9, - SAMPLING_MASK_BITS_MIROSTAT_ETA = 1 << 10, + SAMPLING_MASK_BITS_SAMPLERS = 1 << 0, + SAMPLING_MASK_BITS_TOP_K = 1 << 1, + SAMPLING_MASK_BITS_TOP_P = 1 << 2, + SAMPLING_MASK_BITS_MIN_P = 1 << 3, + SAMPLING_MASK_BITS_XTC_PROBABILITY = 1 << 4, + SAMPLING_MASK_BITS_XTC_THRESHOLD = 1 << 5, + SAMPLING_MASK_BITS_TEMP = 1 << 6, + SAMPLING_MASK_BITS_PENALTY_LAST_N = 1 << 7, + SAMPLING_MASK_BITS_PENALTY_REPEAT = 1 << 8, + SAMPLING_MASK_BITS_MIROSTAT = 1 << 9, + SAMPLING_MASK_BITS_MIROSTAT_TAU = 1 << 10, + SAMPLING_MASK_BITS_MIROSTAT_ETA = 1 << 11, }; std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 9af3c20b9124e..be374c678cf30 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -26,6 +26,7 @@ class General: FILE_TYPE = "general.file_type" # Recommended Sampler Parameters + SAMPLER_SEQUENCE = "general.sampler.sequence" SAMPLER_TOP_K = "general.sampler.top_k" SAMPLER_TOP_P = "general.sampler.top_p" SAMPLER_MIN_P = "general.sampler.min_p" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 22ba27ed6130c..067c062ba76c3 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -496,6 +496,9 @@ def add_custom_alignment(self, alignment: int) -> None: def add_file_type(self, ftype: int) -> None: self.add_uint32(Keys.General.FILE_TYPE, ftype) + def add_sampler_sequence(self, sequence: str) -> None: + self.add_string(Keys.General.SAMPLER_SEQUENCE, sequence) + def add_sampler_top_k(self, top_k: int) -> None: self.add_int32(Keys.General.SAMPLER_TOP_K, top_k) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index f7ed17bababa7..b783d9a82bad8 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -18,6 +18,7 @@ @dataclass class Metadata: # Recommended Sampler Parameters to be written to GGUF KV Store + sampler_sequence: Optional[str] = None sampler_top_k: Optional[int] = None sampler_top_p: Optional[float] = None sampler_min_p: Optional[float] = None @@ -74,22 +75,24 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) if gen_config: - metadata.sampler_top_k = gen_config.get("top_k", metadata.sampler_top_k) - metadata.sampler_top_p = gen_config.get("top_p", metadata.sampler_top_p) - metadata.sampler_min_p = gen_config.get("min_p", metadata.sampler_min_p) + metadata.sampler_sequence = gen_config.get("sequence", metadata.sampler_sequence) + metadata.sampler_top_k = gen_config.get("top_k", metadata.sampler_top_k) + metadata.sampler_top_p = gen_config.get("top_p", metadata.sampler_top_p) + metadata.sampler_min_p = gen_config.get("min_p", metadata.sampler_min_p) metadata.sampler_xtc_probability = gen_config.get("xtc_probability", metadata.sampler_xtc_probability) - metadata.sampler_xtc_threshold = gen_config.get("xtc_threshold", metadata.sampler_xtc_threshold) - metadata.sampler_temp = gen_config.get("temperature", metadata.sampler_temp) - metadata.sampler_penalty_last_n = gen_config.get("penalty_last_n", metadata.sampler_penalty_last_n) - metadata.sampler_penalty_repeat = gen_config.get("penalty_repeat", metadata.sampler_penalty_repeat) - metadata.sampler_mirostat = gen_config.get("mirostat", metadata.sampler_mirostat) - metadata.sampler_mirostat_tau = gen_config.get("mirostat_tau", metadata.sampler_mirostat_tau) - metadata.sampler_mirostat_eta = gen_config.get("mirostat_eta", metadata.sampler_mirostat_eta) + metadata.sampler_xtc_threshold = gen_config.get("xtc_threshold", metadata.sampler_xtc_threshold) + metadata.sampler_temp = gen_config.get("temperature", metadata.sampler_temp) + metadata.sampler_penalty_last_n = gen_config.get("penalty_last_n", metadata.sampler_penalty_last_n) + metadata.sampler_penalty_repeat = gen_config.get("penalty_repeat", metadata.sampler_penalty_repeat) + metadata.sampler_mirostat = gen_config.get("mirostat", metadata.sampler_mirostat) + metadata.sampler_mirostat_tau = gen_config.get("mirostat_tau", metadata.sampler_mirostat_tau) + metadata.sampler_mirostat_eta = gen_config.get("mirostat_eta", metadata.sampler_mirostat_eta) # Metadata Override File Provided # This is based on LLM_KV_NAMES mapping in llama.cpp metadata_override = Metadata.load_metadata_override(metadata_override_path) + metadata.sampler_sequence = metadata_override.get(Keys.General.SAMPLER_SEQUENCE, metadata.sampler_sequence) metadata.sampler_top_k = metadata_override.get(Keys.General.SAMPLER_TOP_K, metadata.sampler_top_k) metadata.sampler_top_p = metadata_override.get(Keys.General.SAMPLER_TOP_P, metadata.sampler_top_p) metadata.sampler_min_p = metadata_override.get(Keys.General.SAMPLER_MIN_P, metadata.sampler_min_p) @@ -603,6 +606,8 @@ def use_array_model_card_metadata(metadata_key: str, model_card_key: str): def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter): assert self.name is not None + if self.sampler_sequence is not None: + gguf_writer.add_sampler_sequence(self.sampler_sequence) if self.sampler_top_k is not None: gguf_writer.add_sampler_top_k(self.sampler_top_k) if self.sampler_top_p is not None: diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index a48d42442edad..423ddf7d41bde 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -117,6 +117,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" }, + { LLM_KV_GENERAL_SAMPLER_SEQUENCE, "general.sampler.sequence" }, { LLM_KV_GENERAL_SAMPLER_TOP_K, "general.sampler.top_k" }, { LLM_KV_GENERAL_SAMPLER_TOP_P, "general.sampler.top_p" }, { LLM_KV_GENERAL_SAMPLER_MIN_P, "general.sampler.min_p" }, diff --git a/src/llama-arch.h b/src/llama-arch.h index 6ed2731e1d4c6..2f868263a9f17 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -121,6 +121,7 @@ enum llm_kv { LLM_KV_GENERAL_QUANTIZATION_VERSION, LLM_KV_GENERAL_ALIGNMENT, LLM_KV_GENERAL_FILE_TYPE, + LLM_KV_GENERAL_SAMPLER_SEQUENCE, LLM_KV_GENERAL_SAMPLER_TOP_K, LLM_KV_GENERAL_SAMPLER_TOP_P, LLM_KV_GENERAL_SAMPLER_MIN_P, From fd3fa3a4776215625279561f1e4cf739db0f5625 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Sun, 9 Nov 2025 23:32:42 +0800 Subject: [PATCH 10/11] gguf-py: revert test_metadata.py Signed-off-by: Aaron Teo --- gguf-py/tests/test_metadata.py | 103 --------------------------------- 1 file changed, 103 deletions(-) diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py index 93e14807cbb23..40d484f4eaa9d 100755 --- a/gguf-py/tests/test_metadata.py +++ b/gguf-py/tests/test_metadata.py @@ -233,109 +233,6 @@ def test_apply_metadata_heuristic_from_model_dir(self): expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B') self.assertEqual(got, expect) - def test_load_generation_config(self): - import tempfile - import json - - # Test with a valid generation_config.json - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - gen_config_path = tmpdir_path / "generation_config.json" - - # Create a sample generation_config.json - gen_config_data = { - "temperature": 0.7, - "top_k": 50, - "top_p": 0.95, - "repetition_penalty": 1.1, - "do_sample": True, - "max_length": 2048 - } - - with open(gen_config_path, "w") as f: - json.dump(gen_config_data, f) - - # Test loading the file - result = gguf.Metadata.load_generation_config(tmpdir_path) - self.assertEqual(result, gen_config_data) - - # Test with missing file - with tempfile.TemporaryDirectory() as tmpdir: - result = gguf.Metadata.load_generation_config(Path(tmpdir)) - self.assertEqual(result, {}) - - # Test with None path - result = gguf.Metadata.load_generation_config(None) - self.assertEqual(result, {}) - - def test_metadata_load_with_generation_config(self): - import tempfile - import json - - # Test that generation_config values are properly loaded into metadata - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - gen_config_path = tmpdir_path / "generation_config.json" - - # Create a sample generation_config.json with sampling parameters - gen_config_data = { - "temperature": 0.8, - "top_k": 40, - "top_p": 0.9, - "min_p": 0.05, - "repetition_penalty": 1.15, - } - - with open(gen_config_path, "w") as f: - json.dump(gen_config_data, f) - - # Load metadata with generation config - metadata = gguf.Metadata.load(model_path=tmpdir_path) - - # Verify sampling parameters were loaded - self.assertEqual(metadata.sampler_temp, 0.8) - self.assertEqual(metadata.sampler_top_k, 40) - self.assertEqual(metadata.sampler_top_p, 0.9) - self.assertEqual(metadata.sampler_min_p, 0.05) - self.assertEqual(metadata.sampler_penalty_repeat, 1.15) - - def test_metadata_override_precedence(self): - import tempfile - import json - - # Test that metadata_override takes precedence over generation_config - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = Path(tmpdir) - gen_config_path = tmpdir_path / "generation_config.json" - metadata_override_path = tmpdir_path / "metadata.json" - - # Create generation_config.json - gen_config_data = { - "temperature": 0.7, - "top_k": 50, - } - with open(gen_config_path, "w") as f: - json.dump(gen_config_data, f) - - # Create metadata.json that overrides temperature - metadata_override_data = { - "general.sampler.temp": 0.5, - } - with open(metadata_override_path, "w") as f: - json.dump(metadata_override_data, f) - - # Load metadata with both files present - metadata = gguf.Metadata.load( - metadata_override_path=metadata_override_path, - model_path=tmpdir_path - ) - - # Verify that metadata_override takes precedence for temperature - self.assertEqual(metadata.sampler_temp, 0.5) - # Verify that generation_config value is used for top_k - self.assertEqual(metadata.sampler_top_k, 50) - if __name__ == "__main__": unittest.main() - From fc91c1025f66e4cd4f6c8f795e3af6e5807a0e96 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Mon, 10 Nov 2025 01:18:05 +0800 Subject: [PATCH 11/11] gguf-py: fix linting Signed-off-by: Aaron Teo --- gguf-py/gguf/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index b783d9a82bad8..2244c072fde3b 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -227,7 +227,7 @@ def load_generation_config(model_path: Optional[Path] = None) -> dict[str, Any]: try: with open(generation_config_path, "r", encoding="utf-8") as f: return json.load(f) - except (json.JSONDecodeError, IOError) as e: + except (json.JSONDecodeError, IOError): # not all models have valid generation_config.json return {}