Skip to content

Commit 58ee399

Browse files
committed
Updated chat template for Qwen3-VL to add the <think> tag again.
1 parent a1c764b commit 58ee399

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3778,22 +3778,39 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
37783778
"{{- '<|im_end|>\n' -}}"
37793779
"{%- endif -%}"
37803780
"{%- endfor -%}"
3781-
"{{- '<im_start>assistant\n' -}}"
3782-
# The thinking model doesn't need the <think></think> tags in this template; the model generates the tags during inference when needed
3781+
"{%- if add_generation_prompt -%}"
3782+
"{{- '<im_start>assistant\n' -}}"
3783+
"{%- if force_reasoning -%}"
3784+
"{{- '<think>\n' -}}"
3785+
"{%- endif -%}"
3786+
"{%- endif -%}"
37833787
)
37843788

37853789
def __init__(
37863790
self,
37873791
thinking_budget: int | None = None,
3792+
force_reasoning: bool = False,
37883793
**kwargs,
37893794
):
3795+
"""
3796+
Parameters:
3797+
- thinking_budget (int | None): # Not implemented yet
3798+
- int: Number of max tokens for the reasoning.
3799+
- None (default): Without limit.
3800+
- force_reasoning (bool):
3801+
- True: Force the reasoning in the model by adding <think> to the chat template.
3802+
- False (default): Don't force the reasoning.
3803+
"""
37903804
self.thinking_budget = thinking_budget
3805+
self.force_reasoning = force_reasoning
37913806
super().__init__(**kwargs)
37923807

37933808
def __call__(self, **kwargs):
37943809
if self.thinking_budget is not None:
37953810
self.extra_template_arguments["thinking_budget"] = str(self.thinking_budget)
37963811

3812+
self.extra_template_arguments["force_reasoning"] = self.force_reasoning
3813+
37973814
llama = kwargs['llama']
37983815

37993816
# Clear state for multiple runs

vendor/llama.cpp

0 commit comments

Comments
 (0)