From 1d41825851d33194d01ead6fb095b20c60046fa0 Mon Sep 17 00:00:00 2001
From: AlcoftTAO <alfonso.sanchez.jimenez9@gmail.com>
Date: Wed, 5 Nov 2025 19:26:38 +0100
Subject: [PATCH 1/5] Better Qwen3VL chat template.

---
 llama_cpp/llama_chat_format.py | 139 +++++++++++++++++++++------------
 1 file changed, 91 insertions(+), 48 deletions(-)
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index bd3da64df..349d6aac2 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -2800,6 +2800,7 @@ def __init__(self, clip_model_path: str, verbose: bool = True):
         self._mtmd_cpp = mtmd_cpp
         self._exit_stack = ExitStack()
         self.mtmd_ctx: Optional[mtmd_cpp.mtmd_context_p] = None
+        self.extra_template_arguments: dict[str, Any] = {}
 
         if not os.path.exists(clip_model_path):
             raise ValueError(f"Clip model path does not exist: {clip_model_path}")
@@ -2931,9 +2932,11 @@ def __call__(
         # Replace image URLs with media markers in the template
         text = template.render(
             messages=messages,
+            tools=tools,
             add_generation_prompt=True,
             eos_token=llama.detokenize([llama.token_eos()]),
             bos_token=llama.detokenize([llama.token_bos()]),
+            **self.extra_template_arguments
         )
 
         # Replace image URLs in text with media markers
@@ -3696,61 +3699,101 @@ def __call__(self, **kwargs):
 
 
 class Qwen3VLChatHandler(Llava15ChatHandler):
-    DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
-
-    CHAT_FORMAT_BASE = (
-        "{% set image_count = namespace(value=0) %}"
-        "{% for message in messages %}"
-        "{% if loop.first and message['role'] != 'system' %}"
-        "<|im_start|>system\n"
-        "{{ self.DEFAULT_SYSTEM_MESSAGE }}<|im_end|>\n"
-        "{% endif %}"
-        "<|im_start|>{{ message['role'] }}\n"
-        "{% if message['content'] is string %}"
-        "{{ message['content'] }}<|im_end|>\n"
-        "{% else %}"
-        "{% for content in message['content'] %}"
-        "{% if content['type'] == 'image_url' %}"
-        "{% if content.image_url is string %}"
-        "{% set image_count.value = image_count.value + 1 %}"
-        "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url }} <|vision_end|>"
-        "{% else %}"
-        "{% set image_count.value = image_count.value + 1 %}"
-        "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url.url }} <|vision_end|>"
-        "{% endif %}"
-        "{% elif content['type'] == 'text' %}"
-        "{{ content['text'] }}"
-        "{% endif %}"
-        "{% endfor %}"
-        "<|im_end|>\n"
-        "{% endif %}"
-        "{% endfor %}"
+    CHAT_FORMAT = (
+        "{{- '<|im_start|>system\n' -}}"
+        "{%- if messages[0].content is string and messages[0].role == 'system' -%}"
+            "{{- messages[0].content -}}"
+        "{%- elif messages[0].role == 'system' -%}"
+            "{%- if 'text' in messages[0].content -%}"
+                "{{- messages[0].content.text -}}"
+            "{%- else -%}"
+                "{{- 'You are a helpful assistant.' -}}"
+            "{%- endif -%}"
+        "{%- endif -%}"
+        "{%- if tools -%}"
+            "{{- '\n\n' -}}"
+            "{{- '# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>' -}}"
+            "{%- for tool in tools -%}"
+                "{{- '\n' -}}"
+                "{{- tool | tojson -}}"
+            "{%- endfor -%}"
+            "{{- '\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <arguments-json-object>}\n</tool_call>\n\nYou can also return a response for the user alongside a function call:\n<response-for-user>\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <arguments-json-object>}\n</tool_call>' -}}"
+        "{%- endif -%}"
+        #"{%- if thinking_budget -%}"
+        #    "{{- '\n\n# Reasoning\n\nYou must generate your reasoning steps within <think></think> XML tags:\n<think>\n<reasoning-content>\n</think>\n<final-response>\n\nThe reasoning content must not exceed the ' + thinking_budget + ' tokens budget.' -}}"
+        #"{%- endif -%}"  # Doesn't work very well, disabled for now
+        "{{- '<|im_end|>\n' -}}"
+        "{%- set image_count = namespace(value=0) -%}"
+        #"{%- set video_count = namespace(value=0) -%}"
+        "{%- for message in messages -%}"
+            "{%- if message.role == 'tool' -%}"
+                "{{- '<|im_start|>user\n<tool_response>\n' -}}"
+            "{%- elif message.role != 'system' -%}"
+                "{{- '<|im_start|>' + message.role + '\n' -}}"
+            "{%- endif -%}"
+            "{%- if message.content is string and message.role != 'system' -%}"
+                "{{- message.content -}}"
+            "{%- elif message.role != 'system' -%}"
+                "{%- for content in message.content -%}"
+                    "{%- if 'image_url' in content -%}"
+                        "{%- set image_count.value = image_count.value + 1 -%}"
+                        "{%- if add_vision_id -%}"
+                            "{{- 'Picture ' + image_count.value + ': ' -}}"
+                        "{%- endif -%}"
+                        "{{- '<|vision_start|>' -}}"
+                        "{%- if content.image_url is string -%}"
+                            "{{- content.image_url -}}"
+                        "{%- else -%}"
+                            "{{- content.image_url.url -}}"
+                        "{%- endif -%}"
+                        "{{- '<|vision_end|>' -}}"
+                    "{%- endif -%}"
+                    # Video not supported yet
+                    "{%- if 'text' in content -%}"
+                        "{{- content.text -}}"
+                    "{%- endif -%}"
+                "{%- endfor -%}"
+            "{%- endif -%}"
+            "{%- if message.role == 'assistant' -%}"
+                "{%- if message.tool_calls -%}"
+                    "{%- for tool_call in message.tool_calls -%}"
+                        "{%- if (loop.first and message.content) or (not loop.first) -%}"
+                            "{{- '\n' -}}"
+                        "{%- endif -%}"
+                        "{%- if tool_call.function -%}"
+                            "{%- set tool_call = tool_call.function -%}"
+                        "{%- endif -%}"
+                        "{{- '<tool_call>\n{\"name\": \"' + tool_call.name + '\", \"arguments\": ' -}}"
+                        "{%- if tool_call.arguments is string -%}"
+                            "{{- tool_call.arguments -}}"
+                        "{%- else -%}"
+                            "{{- tool_call.arguments | tojson -}}"
+                        "{%- endif -%}"
+                        "{{- '}\n</tool_call>' -}}"
+                    "{%- endfor -%}"
+                "{%- endif -%}"
+            "{%- elif message.role == 'tool' -%}"
+                "{{- '</tool_response>' -}}"
+            "{%- elif message.role != 'system' -%}"
+                "{{- '<|im_end|>\n' -}}"
+            "{%- endif -%}"
+        "{%- endfor -%}"
+        "{{- '<im_start>assistant\n' -}}"
+        # The thinking model doesn't need the <think></think> tags in this template; the model generates the tags during inference when needed
     )
 
     def __init__(
         self,
-        use_think_prompt: bool = True,
-        verbose: bool = True,
+        thinking_budget: int | None = None,
         **kwargs,
     ):
-        """
-        Parameters:
-        - use_think_prompt (bool):
-            - True (default): Use the '<think>' prompt (for Thinking version).
-            - False: Do not use '<think>'              (for Instruct version).
-        - verbose (bool): Whether to print verbose logs.
-        """
-        self.use_think_prompt = use_think_prompt
-        self.verbose = verbose
-
-        if self.use_think_prompt:
-            self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n<think>\n"
-        else:
-            self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n"
-
+        self.thinking_budget = thinking_budget
         super().__init__(**kwargs)
 
     def __call__(self, **kwargs):
+        if self.thinking_budget is not None:
+            self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget)
+
         llama = kwargs['llama']
 
         # Clear state for multiple runs
@@ -3770,9 +3813,9 @@ def __call__(self, **kwargs):
             messages = kwargs.get('messages', [])
             try:
                 image_count = len(self.get_image_urls(messages))
-                print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state, processing {image_count} images", file=sys.stderr)
+                print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state, processing {image_count} images", file=sys.stderr)
             except Exception:
-                print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state", file=sys.stderr)
+                print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state", file=sys.stderr)
 
         # Use parent implementation
         return super().__call__(**kwargs)

From 24d72de443fc3072f61395e472c1d52ee53a5b1d Mon Sep 17 00:00:00 2001
From: JamePeng <jame_peng@sina.com>
Date: Thu, 6 Nov 2025 19:09:27 +0800
Subject: [PATCH 2/5] Update Submodule vendor/llama.cpp 48bd265..b7f9010

---
 vendor/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 48bd26501..b7f9010d2 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 48bd26501b08a3f0bff1249db47f313641f7bebb
+Subproject commit b7f9010d24766792d8887c227a883ed3b315d2be

From 58ee3997dc3ad9a5e56505aa95070391aca439ca Mon Sep 17 00:00:00 2001
From: AlcoftTAO <alfonso.sanchez.jimenez9@gmail.com>
Date: Sat, 8 Nov 2025 00:21:57 +0100
Subject: [PATCH 3/5] Updated chat template for Qwen3-VL to add the <think> tag
 again.

---
 llama_cpp/llama_chat_format.py | 21 +++++++++++++++++++--
 vendor/llama.cpp               |  2 +-
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index 349d6aac2..fd9d6a820 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -3778,22 +3778,39 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
                 "{{- '<|im_end|>\n' -}}"
             "{%- endif -%}"
         "{%- endfor -%}"
-        "{{- '<im_start>assistant\n' -}}"
-        # The thinking model doesn't need the <think></think> tags in this template; the model generates the tags during inference when needed
+        "{%- if add_generation_prompt -%}"
+            "{{- '<im_start>assistant\n' -}}"
+            "{%- if force_reasoning -%}"
+                "{{- '<think>\n' -}}"
+            "{%- endif -%}"
+        "{%- endif -%}"
     )
 
     def __init__(
         self,
         thinking_budget: int | None = None,
+        force_reasoning: bool = False,
         **kwargs,
     ):
+        """
+        Parameters:
+        - thinking_budget (int | None):  # Not implemented yet
+            - int: Number of max tokens for the reasoning.
+            - None (default): Without limit.
+        - force_reasoning (bool):
+            - True: Force the reasoning in the model by adding <think> to the chat template.
+            - False (default): Don't force the reasoning.
+        """
         self.thinking_budget = thinking_budget
+        self.force_reasoning = force_reasoning
         super().__init__(**kwargs)
 
     def __call__(self, **kwargs):
         if self.thinking_budget is not None:
             self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget)
 
+        self.extra_template_arguments["force_reasoning"] = self.force_reasoning
+
         llama = kwargs['llama']
 
         # Clear state for multiple runs
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index b7f9010d2..48bd26501 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit b7f9010d24766792d8887c227a883ed3b315d2be
+Subproject commit 48bd26501b08a3f0bff1249db47f313641f7bebb

From a749dfaec847bca62fc18ae6402bd0e1d6266d27 Mon Sep 17 00:00:00 2001
From: AlcoftTAO <alfonso.sanchez.jimenez9@gmail.com>
Date: Sat, 8 Nov 2025 02:00:23 +0100
Subject: [PATCH 4/5] Deleted 'thinking_budget' because it's not implemented
 yet.

---
 llama_cpp/llama_chat_format.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index fd9d6a820..2b37e3334 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -3719,9 +3719,6 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
             "{%- endfor -%}"
             "{{- '\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <arguments-json-object>}\n</tool_call>\n\nYou can also return a response for the user alongside a function call:\n<response-for-user>\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <arguments-json-object>}\n</tool_call>' -}}"
         "{%- endif -%}"
-        #"{%- if thinking_budget -%}"
-        #    "{{- '\n\n# Reasoning\n\nYou must generate your reasoning steps within <think></think> XML tags:\n<think>\n<reasoning-content>\n</think>\n<final-response>\n\nThe reasoning content must not exceed the ' + thinking_budget + ' tokens budget.' -}}"
-        #"{%- endif -%}"  # Doesn't work very well, disabled for now
         "{{- '<|im_end|>\n' -}}"
         "{%- set image_count = namespace(value=0) -%}"
         #"{%- set video_count = namespace(value=0) -%}"
@@ -3788,29 +3785,20 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
 
     def __init__(
         self,
-        thinking_budget: int | None = None,
         force_reasoning: bool = False,
         **kwargs,
     ):
         """
         Parameters:
-        - thinking_budget (int | None):  # Not implemented yet
-            - int: Number of max tokens for the reasoning.
-            - None (default): Without limit.
         - force_reasoning (bool):
             - True: Force the reasoning in the model by adding <think> to the chat template.
             - False (default): Don't force the reasoning.
         """
-        self.thinking_budget = thinking_budget
         self.force_reasoning = force_reasoning
         super().__init__(**kwargs)
 
     def __call__(self, **kwargs):
-        if self.thinking_budget is not None:
-            self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget)
-
         self.extra_template_arguments["force_reasoning"] = self.force_reasoning
-
         llama = kwargs['llama']
 
         # Clear state for multiple runs
@@ -3830,9 +3818,9 @@ def __call__(self, **kwargs):
             messages = kwargs.get('messages', [])
             try:
                 image_count = len(self.get_image_urls(messages))
-                print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state, processing {image_count} images", file=sys.stderr)
+                print(f"Qwen3VLHandler(force_reasoning={self.force_reasoning}) - Cleared state, processing {image_count} images", file=sys.stderr)
             except Exception:
-                print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state", file=sys.stderr)
+                print(f"Qwen3VLHandler(force_reasoning={self.force_reasoning}) - Cleared state", file=sys.stderr)
 
         # Use parent implementation
         return super().__call__(**kwargs)

From 14d14ccbbbd68de2a4de320542c8e5324a97234b Mon Sep 17 00:00:00 2001
From: AlcoftTAO <alfonso.sanchez.jimenez9@gmail.com>
Date: Sat, 8 Nov 2025 03:09:52 +0100
Subject: [PATCH 5/5] Added 'add_vision_id' to the chat template.

---
 llama_cpp/llama_chat_format.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index 2b37e3334..e1a447550 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -3735,7 +3735,9 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
                     "{%- if 'image_url' in content -%}"
                         "{%- set image_count.value = image_count.value + 1 -%}"
                         "{%- if add_vision_id -%}"
-                            "{{- 'Picture ' + image_count.value + ': ' -}}"
+                            "{{- 'Picture ' -}}"
+                            "{{- image_count.value | string -}}"
+                            "{{- ': ' -}}"
                         "{%- endif -%}"
                         "{{- '<|vision_start|>' -}}"
                         "{%- if content.image_url is string -%}"
@@ -3786,6 +3788,7 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
     def __init__(
         self,
         force_reasoning: bool = False,
+        add_vision_id: bool = True,
         **kwargs,
     ):
         """
@@ -3793,12 +3796,19 @@ def __init__(
         - force_reasoning (bool):
             - True: Force the reasoning in the model by adding <think> to the chat template.
             - False (default): Don't force the reasoning.
+        - add_vision_id (bool):
+            - True (default): Count all the images. Recommended for multi-image.
+            - False: Doesn't count the images. Can save tokens with single-image.
         """
         self.force_reasoning = force_reasoning
+        self.add_vision_id = add_vision_id
+
         super().__init__(**kwargs)
 
     def __call__(self, **kwargs):
         self.extra_template_arguments["force_reasoning"] = self.force_reasoning
+        self.extra_template_arguments["add_vision_id"] = self.add_vision_id
+
         llama = kwargs['llama']
 
         # Clear state for multiple runs