From d101cfad997647f4a7be42146481d61fed385f53 Mon Sep 17 00:00:00 2001 From: ju6ge Date: Mon, 3 Nov 2025 10:15:52 +0100 Subject: [PATCH 1/3] use environment variable to disable control token decoding llama-cpp-rs original usage required ommiting control tokens from the consumer of the library. This should not be the default though so now this behavior can be selectively be enabled through an environment variable --- llama-cpp-2/src/model.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index d8625b8d..4af3b4b2 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -395,8 +395,11 @@ impl LlamaModel { if attrs.is_empty() || attrs .intersects(LlamaTokenAttr::Unknown | LlamaTokenAttr::Byte | LlamaTokenAttr::Unused) + // the following exclusion of control characters stems from a requirement of the original purpose of this project see + // https://github.com/utilityai/llama-cpp-rs/issues/826#issuecomment-3478624072. But it should not be the default behavior + // so this feature is now gated through the `LLAMA_RS_FORBID_CTRL_TOKEN_DECODE` environment variable || attrs.contains(LlamaTokenAttr::Control) - && (token == self.token_bos() || token == self.token_eos()) + && (token == self.token_bos() || token == self.token_eos()) && std::env::var("LLAMA_RS_FORBID_CTRL_TOKEN_DECODE").is_ok_and(|v| v.parse::().is_ok_and(|v| v)) { return Ok(Vec::new()); } From 9459a284804b819719004e9b2a7009e2a324953b Mon Sep 17 00:00:00 2001 From: ju6ge Date: Tue, 4 Nov 2025 12:14:33 +0100 Subject: [PATCH 2/3] instead of an env variable use `special` parameter given that the `special` function argument is used to toggle if the cpp bindings to llama.cpp render special tokens to the output the flag can also be reused to feature gate the exclusion of `token_bos` and `token_eos` from the output. --- llama-cpp-2/src/model.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 4af3b4b2..7ba4531a 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -396,10 +396,11 @@ impl LlamaModel { || attrs .intersects(LlamaTokenAttr::Unknown | LlamaTokenAttr::Byte | LlamaTokenAttr::Unused) // the following exclusion of control characters stems from a requirement of the original purpose of this project see - // https://github.com/utilityai/llama-cpp-rs/issues/826#issuecomment-3478624072. But it should not be the default behavior - // so this feature is now gated through the `LLAMA_RS_FORBID_CTRL_TOKEN_DECODE` environment variable + // https://github.com/utilityai/llama-cpp-rs/issues/826#issuecomment-3478624072. But it should not be the default behavior. + // given that llama.cpp [documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_cpp.llama_token_to_piece) + // states that `special` controls where specital tokens are rendered we can use it as a gate to this feature as well. || attrs.contains(LlamaTokenAttr::Control) - && (token == self.token_bos() || token == self.token_eos()) && std::env::var("LLAMA_RS_FORBID_CTRL_TOKEN_DECODE").is_ok_and(|v| v.parse::().is_ok_and(|v| v)) + && (token == self.token_bos() || token == self.token_eos()) && special == Special::Plaintext { return Ok(Vec::new()); } From 6ac0079430edc0fa1c3fcb8feee34f4a503bc873 Mon Sep 17 00:00:00 2001 From: ju6ge Date: Tue, 4 Nov 2025 12:47:35 +0100 Subject: [PATCH 3/3] introduce new variant to `Special` enum to keep all ranges of behavior possible a special variant to the `Special` enum was introduced (ExcludeBosAndEos). It allows decoding of tokens but excludes `bos` and `eos` tokens from the stream. This can be used to keep the old behavior of llama-cpp-rs for decoding streams while allowing the expected behavior that all tokens will be decoded by default. See https://github.com/utilityai/llama-cpp-rs/pull/856 for the discussion about this. --- llama-cpp-2/src/model.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs index 7ba4531a..2966d516 100644 --- a/llama-cpp-2/src/model.rs +++ b/llama-cpp-2/src/model.rs @@ -115,6 +115,10 @@ pub enum AddBos { pub enum Special { /// Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space. Tokenize, + /// Allow tokenizing special and/or control tokens but excludes `bos` and `eos` tokens from the output + /// + /// This variant was introduced as a compatiblity flag to address: https://github.com/utilityai/llama-cpp-rs/issues/826 + ExcludeBosAndEos, /// Treat special and/or control tokens as plaintext. Plaintext, } @@ -400,13 +404,14 @@ impl LlamaModel { // given that llama.cpp [documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.llama_cpp.llama_token_to_piece) // states that `special` controls where specital tokens are rendered we can use it as a gate to this feature as well. || attrs.contains(LlamaTokenAttr::Control) - && (token == self.token_bos() || token == self.token_eos()) && special == Special::Plaintext + && (token == self.token_bos() || token == self.token_eos()) && special == Special::ExcludeBosAndEos { return Ok(Vec::new()); } let special = match special { Special::Tokenize => true, + Special::ExcludeBosAndEos => true, Special::Plaintext => false, };