@@ -3735,7 +3735,9 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
37353735 "{%- if 'image_url' in content -%}"
37363736 "{%- set image_count.value = image_count.value + 1 -%}"
37373737 "{%- if add_vision_id -%}"
3738- "{{- 'Picture ' + image_count.value + ': ' -}}"
3738+ "{{- 'Picture ' -}}"
3739+ "{{- image_count.value | string -}}"
3740+ "{{- ': ' -}}"
37393741 "{%- endif -%}"
37403742 "{{- '<|vision_start|>' -}}"
37413743 "{%- if content.image_url is string -%}"
@@ -3786,19 +3788,27 @@ class Qwen3VLChatHandler(Llava15ChatHandler):
37863788 def __init__ (
37873789 self ,
37883790 force_reasoning : bool = False ,
3791+ add_vision_id : bool = True ,
37893792 ** kwargs ,
37903793 ):
37913794 """
37923795 Parameters:
37933796 - force_reasoning (bool):
37943797 - True: Force the reasoning in the model by adding <think> to the chat template.
37953798 - False (default): Don't force the reasoning.
3799+ - add_vision_id (bool):
3800+ - True (default): Count all the images. Recommended for multi-image.
3801+ - False: Doesn't count the images. Can save tokens with single-image.
37963802 """
37973803 self .force_reasoning = force_reasoning
3804+ self .add_vision_id = add_vision_id
3805+
37983806 super ().__init__ (** kwargs )
37993807
38003808 def __call__ (self , ** kwargs ):
38013809 self .extra_template_arguments ["force_reasoning" ] = self .force_reasoning
3810+ self .extra_template_arguments ["add_vision_id" ] = self .add_vision_id
3811+
38023812 llama = kwargs ['llama' ]
38033813
38043814 # Clear state for multiple runs
0 commit comments