From 1d41825851d33194d01ead6fb095b20c60046fa0 Mon Sep 17 00:00:00 2001 From: AlcoftTAO Date: Wed, 5 Nov 2025 19:26:38 +0100 Subject: [PATCH 1/5] Better Qwen3VL chat template. --- llama_cpp/llama_chat_format.py | 139 +++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 48 deletions(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index bd3da64df..349d6aac2 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -2800,6 +2800,7 @@ def __init__(self, clip_model_path: str, verbose: bool = True): self._mtmd_cpp = mtmd_cpp self._exit_stack = ExitStack() self.mtmd_ctx: Optional[mtmd_cpp.mtmd_context_p] = None + self.extra_template_arguments: dict[str, Any] = {} if not os.path.exists(clip_model_path): raise ValueError(f"Clip model path does not exist: {clip_model_path}") @@ -2931,9 +2932,11 @@ def __call__( # Replace image URLs with media markers in the template text = template.render( messages=messages, + tools=tools, add_generation_prompt=True, eos_token=llama.detokenize([llama.token_eos()]), bos_token=llama.detokenize([llama.token_bos()]), + **self.extra_template_arguments ) # Replace image URLs in text with media markers @@ -3696,61 +3699,101 @@ def __call__(self, **kwargs): class Qwen3VLChatHandler(Llava15ChatHandler): - DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant." - - CHAT_FORMAT_BASE = ( - "{% set image_count = namespace(value=0) %}" - "{% for message in messages %}" - "{% if loop.first and message['role'] != 'system' %}" - "<|im_start|>system\n" - "{{ self.DEFAULT_SYSTEM_MESSAGE }}<|im_end|>\n" - "{% endif %}" - "<|im_start|>{{ message['role'] }}\n" - "{% if message['content'] is string %}" - "{{ message['content'] }}<|im_end|>\n" - "{% else %}" - "{% for content in message['content'] %}" - "{% if content['type'] == 'image_url' %}" - "{% if content.image_url is string %}" - "{% set image_count.value = image_count.value + 1 %}" - "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url }} <|vision_end|>" - "{% else %}" - "{% set image_count.value = image_count.value + 1 %}" - "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url.url }} <|vision_end|>" - "{% endif %}" - "{% elif content['type'] == 'text' %}" - "{{ content['text'] }}" - "{% endif %}" - "{% endfor %}" - "<|im_end|>\n" - "{% endif %}" - "{% endfor %}" + CHAT_FORMAT = ( + "{{- '<|im_start|>system\n' -}}" + "{%- if messages[0].content is string and messages[0].role == 'system' -%}" + "{{- messages[0].content -}}" + "{%- elif messages[0].role == 'system' -%}" + "{%- if 'text' in messages[0].content -%}" + "{{- messages[0].content.text -}}" + "{%- else -%}" + "{{- 'You are a helpful assistant.' -}}" + "{%- endif -%}" + "{%- endif -%}" + "{%- if tools -%}" + "{{- '\n\n' -}}" + "{{- '# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n' -}}" + "{%- for tool in tools -%}" + "{{- '\n' -}}" + "{{- tool | tojson -}}" + "{%- endfor -%}" + "{{- '\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n\n\nYou can also return a response for the user alongside a function call:\n\n\n{\"name\": , \"arguments\": }\n' -}}" + "{%- endif -%}" + #"{%- if thinking_budget -%}" + # "{{- '\n\n# Reasoning\n\nYou must generate your reasoning steps within XML tags:\n\n\n\n\n\nThe reasoning content must not exceed the ' + thinking_budget + ' tokens budget.' -}}" + #"{%- endif -%}" # Doesn't work very well, disabled for now + "{{- '<|im_end|>\n' -}}" + "{%- set image_count = namespace(value=0) -%}" + #"{%- set video_count = namespace(value=0) -%}" + "{%- for message in messages -%}" + "{%- if message.role == 'tool' -%}" + "{{- '<|im_start|>user\n\n' -}}" + "{%- elif message.role != 'system' -%}" + "{{- '<|im_start|>' + message.role + '\n' -}}" + "{%- endif -%}" + "{%- if message.content is string and message.role != 'system' -%}" + "{{- message.content -}}" + "{%- elif message.role != 'system' -%}" + "{%- for content in message.content -%}" + "{%- if 'image_url' in content -%}" + "{%- set image_count.value = image_count.value + 1 -%}" + "{%- if add_vision_id -%}" + "{{- 'Picture ' + image_count.value + ': ' -}}" + "{%- endif -%}" + "{{- '<|vision_start|>' -}}" + "{%- if content.image_url is string -%}" + "{{- content.image_url -}}" + "{%- else -%}" + "{{- content.image_url.url -}}" + "{%- endif -%}" + "{{- '<|vision_end|>' -}}" + "{%- endif -%}" + # Video not supported yet + "{%- if 'text' in content -%}" + "{{- content.text -}}" + "{%- endif -%}" + "{%- endfor -%}" + "{%- endif -%}" + "{%- if message.role == 'assistant' -%}" + "{%- if message.tool_calls -%}" + "{%- for tool_call in message.tool_calls -%}" + "{%- if (loop.first and message.content) or (not loop.first) -%}" + "{{- '\n' -}}" + "{%- endif -%}" + "{%- if tool_call.function -%}" + "{%- set tool_call = tool_call.function -%}" + "{%- endif -%}" + "{{- '\n{\"name\": \"' + tool_call.name + '\", \"arguments\": ' -}}" + "{%- if tool_call.arguments is string -%}" + "{{- tool_call.arguments -}}" + "{%- else -%}" + "{{- tool_call.arguments | tojson -}}" + "{%- endif -%}" + "{{- '}\n' -}}" + "{%- endfor -%}" + "{%- endif -%}" + "{%- elif message.role == 'tool' -%}" + "{{- '' -}}" + "{%- elif message.role != 'system' -%}" + "{{- '<|im_end|>\n' -}}" + "{%- endif -%}" + "{%- endfor -%}" + "{{- 'assistant\n' -}}" + # The thinking model doesn't need the tags in this template; the model generates the tags during inference when needed ) def __init__( self, - use_think_prompt: bool = True, - verbose: bool = True, + thinking_budget: int | None = None, **kwargs, ): - """ - Parameters: - - use_think_prompt (bool): - - True (default): Use the '' prompt (for Thinking version). - - False: Do not use '' (for Instruct version). - - verbose (bool): Whether to print verbose logs. - """ - self.use_think_prompt = use_think_prompt - self.verbose = verbose - - if self.use_think_prompt: - self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n\n" - else: - self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n" - + self.thinking_budget = thinking_budget super().__init__(**kwargs) def __call__(self, **kwargs): + if self.thinking_budget is not None: + self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget) + llama = kwargs['llama'] # Clear state for multiple runs @@ -3770,9 +3813,9 @@ def __call__(self, **kwargs): messages = kwargs.get('messages', []) try: image_count = len(self.get_image_urls(messages)) - print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state, processing {image_count} images", file=sys.stderr) + print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state, processing {image_count} images", file=sys.stderr) except Exception: - print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state", file=sys.stderr) + print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state", file=sys.stderr) # Use parent implementation return super().__call__(**kwargs) From 24d72de443fc3072f61395e472c1d52ee53a5b1d Mon Sep 17 00:00:00 2001 From: JamePeng Date: Thu, 6 Nov 2025 19:09:27 +0800 Subject: [PATCH 2/5] Update Submodule vendor/llama.cpp 48bd265..b7f9010 --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 48bd26501..b7f9010d2 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 48bd26501b08a3f0bff1249db47f313641f7bebb +Subproject commit b7f9010d24766792d8887c227a883ed3b315d2be From 58ee3997dc3ad9a5e56505aa95070391aca439ca Mon Sep 17 00:00:00 2001 From: AlcoftTAO Date: Sat, 8 Nov 2025 00:21:57 +0100 Subject: [PATCH 3/5] Updated chat template for Qwen3-VL to add the tag again. --- llama_cpp/llama_chat_format.py | 21 +++++++++++++++++++-- vendor/llama.cpp | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 349d6aac2..fd9d6a820 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -3778,22 +3778,39 @@ class Qwen3VLChatHandler(Llava15ChatHandler): "{{- '<|im_end|>\n' -}}" "{%- endif -%}" "{%- endfor -%}" - "{{- 'assistant\n' -}}" - # The thinking model doesn't need the tags in this template; the model generates the tags during inference when needed + "{%- if add_generation_prompt -%}" + "{{- 'assistant\n' -}}" + "{%- if force_reasoning -%}" + "{{- '\n' -}}" + "{%- endif -%}" + "{%- endif -%}" ) def __init__( self, thinking_budget: int | None = None, + force_reasoning: bool = False, **kwargs, ): + """ + Parameters: + - thinking_budget (int | None): # Not implemented yet + - int: Number of max tokens for the reasoning. + - None (default): Without limit. + - force_reasoning (bool): + - True: Force the reasoning in the model by adding to the chat template. + - False (default): Don't force the reasoning. + """ self.thinking_budget = thinking_budget + self.force_reasoning = force_reasoning super().__init__(**kwargs) def __call__(self, **kwargs): if self.thinking_budget is not None: self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget) + self.extra_template_arguments["force_reasoning"] = self.force_reasoning + llama = kwargs['llama'] # Clear state for multiple runs diff --git a/vendor/llama.cpp b/vendor/llama.cpp index b7f9010d2..48bd26501 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit b7f9010d24766792d8887c227a883ed3b315d2be +Subproject commit 48bd26501b08a3f0bff1249db47f313641f7bebb From a749dfaec847bca62fc18ae6402bd0e1d6266d27 Mon Sep 17 00:00:00 2001 From: AlcoftTAO Date: Sat, 8 Nov 2025 02:00:23 +0100 Subject: [PATCH 4/5] Deleted 'thinking_budget' because it's not implemented yet. --- llama_cpp/llama_chat_format.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index fd9d6a820..2b37e3334 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -3719,9 +3719,6 @@ class Qwen3VLChatHandler(Llava15ChatHandler): "{%- endfor -%}" "{{- '\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n\n\nYou can also return a response for the user alongside a function call:\n\n\n{\"name\": , \"arguments\": }\n' -}}" "{%- endif -%}" - #"{%- if thinking_budget -%}" - # "{{- '\n\n# Reasoning\n\nYou must generate your reasoning steps within XML tags:\n\n\n\n\n\nThe reasoning content must not exceed the ' + thinking_budget + ' tokens budget.' -}}" - #"{%- endif -%}" # Doesn't work very well, disabled for now "{{- '<|im_end|>\n' -}}" "{%- set image_count = namespace(value=0) -%}" #"{%- set video_count = namespace(value=0) -%}" @@ -3788,29 +3785,20 @@ class Qwen3VLChatHandler(Llava15ChatHandler): def __init__( self, - thinking_budget: int | None = None, force_reasoning: bool = False, **kwargs, ): """ Parameters: - - thinking_budget (int | None): # Not implemented yet - - int: Number of max tokens for the reasoning. - - None (default): Without limit. - force_reasoning (bool): - True: Force the reasoning in the model by adding to the chat template. - False (default): Don't force the reasoning. """ - self.thinking_budget = thinking_budget self.force_reasoning = force_reasoning super().__init__(**kwargs) def __call__(self, **kwargs): - if self.thinking_budget is not None: - self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget) - self.extra_template_arguments["force_reasoning"] = self.force_reasoning - llama = kwargs['llama'] # Clear state for multiple runs @@ -3830,9 +3818,9 @@ def __call__(self, **kwargs): messages = kwargs.get('messages', []) try: image_count = len(self.get_image_urls(messages)) - print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state, processing {image_count} images", file=sys.stderr) + print(f"Qwen3VLHandler(force_reasoning={self.force_reasoning}) - Cleared state, processing {image_count} images", file=sys.stderr) except Exception: - print(f"Qwen3VLHandler(thinking_budget={self.thinking_budget}) - Cleared state", file=sys.stderr) + print(f"Qwen3VLHandler(force_reasoning={self.force_reasoning}) - Cleared state", file=sys.stderr) # Use parent implementation return super().__call__(**kwargs) From 14d14ccbbbd68de2a4de320542c8e5324a97234b Mon Sep 17 00:00:00 2001 From: AlcoftTAO Date: Sat, 8 Nov 2025 03:09:52 +0100 Subject: [PATCH 5/5] Added 'add_vision_id' to the chat template. --- llama_cpp/llama_chat_format.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 2b37e3334..e1a447550 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -3735,7 +3735,9 @@ class Qwen3VLChatHandler(Llava15ChatHandler): "{%- if 'image_url' in content -%}" "{%- set image_count.value = image_count.value + 1 -%}" "{%- if add_vision_id -%}" - "{{- 'Picture ' + image_count.value + ': ' -}}" + "{{- 'Picture ' -}}" + "{{- image_count.value | string -}}" + "{{- ': ' -}}" "{%- endif -%}" "{{- '<|vision_start|>' -}}" "{%- if content.image_url is string -%}" @@ -3786,6 +3788,7 @@ class Qwen3VLChatHandler(Llava15ChatHandler): def __init__( self, force_reasoning: bool = False, + add_vision_id: bool = True, **kwargs, ): """ @@ -3793,12 +3796,19 @@ def __init__( - force_reasoning (bool): - True: Force the reasoning in the model by adding to the chat template. - False (default): Don't force the reasoning. + - add_vision_id (bool): + - True (default): Count all the images. Recommended for multi-image. + - False: Doesn't count the images. Can save tokens with single-image. """ self.force_reasoning = force_reasoning + self.add_vision_id = add_vision_id + super().__init__(**kwargs) def __call__(self, **kwargs): self.extra_template_arguments["force_reasoning"] = self.force_reasoning + self.extra_template_arguments["add_vision_id"] = self.add_vision_id + llama = kwargs['llama'] # Clear state for multiple runs