JamePeng · JamePeng · Mar 1, 2026 · Feb 26, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -4669,7 +4669,6 @@ def __init__(
         self,
         force_reasoning: bool = False,
         add_vision_id: bool = True,
-        image_min_tokens: int = -1,
         **kwargs,
     ):
         """
@@ -4680,20 +4679,13 @@ def __init__(
         - add_vision_id (bool):
             - True (default): Count all the images. Recommended for multi-image.
             - False: Doesn't count the images. Can save tokens with single-image.
-        - image_min_tokens (int):
-            It only takes effect when the value is greater than zero. the default value is -1 (i.e., using the default parameters in the model's preprocessor_config.json).
-            Note: Qwen-VL models require at minimum 1024 image tokens to function correctly on bbox grounding tasks
         """
+        super().__init__(**kwargs)
         self.force_reasoning = force_reasoning
-        self.add_vision_id = add_vision_id
-        self.image_min_tokens = image_min_tokens
-
-        super().__init__(image_min_tokens=self.image_min_tokens, **kwargs)
+        self.extra_template_arguments["force_reasoning"] = force_reasoning
+        self.extra_template_arguments["add_vision_id"] = add_vision_id
 
     def __call__(self, **kwargs):
-        self.extra_template_arguments["force_reasoning"] = self.force_reasoning
-        self.extra_template_arguments["add_vision_id"] = self.add_vision_id
-
         llama = kwargs['llama']
 
         if hasattr(llama, 'input_ids'):
@@ -4705,6 +4697,202 @@ def __call__(self, **kwargs):
         # Use parent implementation
         return super().__call__(**kwargs)
 
+class Qwen35ChatHandler(MTMDChatHandler):
+    CHAT_FORMAT = (
+        "{%- set image_count = namespace(value=0) -%}"
+        "{%- set video_count = namespace(value=0) -%}"
+        "{%- macro render_content(content, do_vision_count, is_system_content=false) -%}"
+        "    {%- if content is string -%}"
+        "        {{- content -}}"
+        "    {%- elif content is iterable and content is not mapping -%}"
+        "        {%- for item in content -%}"
+        "            {%- if 'image_url' in item or item.type == 'image_url' -%}"
+        "                {%- if is_system_content -%}"
+        "                    {{- raise_exception('System message cannot contain images.') -}}"
+        "                {%- endif -%}"
+        "                {%- if do_vision_count -%}"
+        "                    {%- set image_count.value = image_count.value + 1 -%}"
+        "                {%- endif -%}"
+        "                {%- if add_vision_id -%}"
+        "                    {{- 'Picture ' -}}"
+        "                    {{- image_count.value | string -}}"
+        "                    {{- ': ' -}}"
+        "                {%- endif -%}"
+        "                {{- '<|vision_start|>' -}}"
+        "                {%- if item.image_url is string -%}"
+        "                    {{- item.image_url -}}"
+        "                {%- else -%}"
+        "                    {{- item.image_url.url -}}"
+        "                {%- endif -%}"
+        "                {{- '<|vision_end|>' -}}"
+        "            {%- elif 'video' in item -%}"
+        "                {{- raise_exception('llama.cpp does not currently support video.') -}}"  # Video not supported, raise exception
+        "                {%- if is_system_content -%}"
+        "                    {{- raise_exception('System message cannot contain videos.') -}}"
+        "                {%- endif -%}"
+        "                {%- if do_vision_count -%}"
+        "                    {%- set video_count.value = video_count.value + 1 -%}"
+        "                {%- endif -%}"
+        "                {%- if add_vision_id -%}"
+        "                    {{- 'Video ' ~ video_count.value ~ ': ' -}}"
+        "                {%- endif -%}"
+        "                {{- '<|vision_start|>' -}}"
+        "                {{- item.video -}}"
+        "                {{- '<|vision_end|>' -}}"
+        "            {%- elif 'text' in item -%}"
+        "                {{- item.text -}}"
+        "            {%- else -%}"
+        "                {{- raise_exception('Unexpected item type in content.') -}}"
+        "            {%- endif -%}"
+        "        {%- endfor -%}"
+        "    {%- elif content is none or content is undefined -%}"
+        "        {{- '' -}}"
+        "    {%- else -%}"
+        "        {{- raise_exception('Unexpected content type.') -}}"
+        "    {%- endif -%}"
+        "{%- endmacro -%}"
+        "{%- if not messages -%}"
+        "    {{- raise_exception('No messages provided.') -}}"
+        "{%- endif -%}"
+        "{%- if tools and tools is iterable and tools is not mapping -%}"
+        "    {{- '<|im_start|>system\n' -}}"
+        "    {{- '# Tools\n\nYou have access to the following functions:\n\n<tools>' -}}"
+        "    {%- for tool in tools -%}"
+        "        {{- '\n' -}}"
+        "        {{- tool | tojson -}}"
+        "    {%- endfor -%}"
+        "    {{- '\n</tools>' -}}"
+        "    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' -}}"
+        "    {%- if messages[0].role == 'system' -%}"
+        "        {%- set content = render_content(messages[0].content, false, true) | trim -%}"
+        "        {%- if content -%}"
+        "            {{- '\n\n' + content -}}"
+        "        {%- endif -%}"
+        "    {%- endif -%}"
+        "    {{- '<|im_end|>\n' -}}"
+        "{%- elif messages[0].role == 'system' -%}"
+        "    {%- set content = render_content(messages[0].content, false, true) -%}"
+        "    {{- '<|im_start|>system\n' + content + '<|im_end|>\n' -}}"
+        "{%- endif -%}"
+        "{%- set ns = namespace(multi_step_tool=true, last_query_index=messages | length - 1) -%}"
+        "{%- for message in messages[::-1] -%}"
+        "    {%- set index = messages | length - 1 - loop.index0 -%}"
+        "    {%- if ns.multi_step_tool and message.role == 'user' -%}"
+        "        {%- set content = render_content(message.content, false) | trim -%}"
+        "        {%- if not (content.startswith('<tool_response>') and content.endswith('</tool_response>')) -%}"
+        "            {%- set ns.multi_step_tool = false -%}"
+        "            {%- set ns.last_query_index = index -%}"
+        "        {%- endif -%}"
+        "    {%- endif -%}"
+        "{%- endfor -%}"
+        "{%- if ns.multi_step_tool -%}"
+        "    {{- raise_exception('No user query found in messages.') -}}"
+        "{%- endif -%}"
+        "{%- for message in messages -%}"
+        "    {%- set content = render_content(message.content, true) | trim -%}"
+        "    {%- if message.role == 'system' -%}"
+        "        {%- if not loop.first -%}"
+        "            {{- raise_exception('System message must be at the beginning.') -}}"
+        "        {%- endif -%}"
+        "    {%- elif message.role == 'user' -%}"
+        "        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>\n' -}}"
+        "    {%- elif message.role == 'assistant' -%}"
+        "        {%- set reasoning_content = '' -%}"
+        "        {%- if message.reasoning_content is string -%}"
+        "            {%- set reasoning_content = message.reasoning_content -%}"
+        "        {%- elif '</think>' in content -%}"
+        "            {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') -%}"
+        "            {%- set content = content.split('</think>')[-1].lstrip('\n') -%}"
+        "        {%- endif -%}"
+        "        {%- set reasoning_content = reasoning_content | trim -%}"
+        "        {%- if loop.index0 > ns.last_query_index -%}"
+        "            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content -}}"
+        "        {%- else -%}"
+        "            {{- '<|im_start|>' + message.role + '\n' + content -}}"
+        "        {%- endif -%}"
+        "        {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping -%}"
+        "            {%- for tool_call in message.tool_call -%}"
+        "                {%- if tool_call.function is defined -%}"
+        "                    {%- set tool_call = tool_call.function -%}"
+        "                {%- endif -%}"
+        "                {%- if loop.first -%}"
+        "                    {%- if content | trim -%}"
+        "                        {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' -}}"
+        "                    {%- else -%}"
+        "                        {{- '<tool_call>\n<function=' + tool_call.name + '>\n' -}}"
+        "                    {%- endif -%}"
+        "                {%- else -%}"
+        "                    {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' -}}"
+        "                {%- endif -%}"
+        "                {%- if tool_call.arguments is defined -%}"
+        "                    {%- for (args_name, args_value) in tool_calls.arguments | items -%}"
+        "                        {{- '<parameter=' + args.name + '>\n' -}}"
+        "                        {%- set args_value = args_value | tojson | safe if args_value is mapping or args_value is sequence and args_value is not string else args_value | string -%}"
+        "                        {{- args_value -}}"
+        "                        {{- '\n</parameter>' -}}"
+        "                    {%- endfor -%}"
+        "                {%- endif -%}"
+        "                {{- '</function>\n</tool_call>' -}}"
+        "            {%- endfor -%}"
+        "        {%- endif -%}"
+        "        {{- '<|im_end|>\n' -}}"
+        "    {%- elif message.role == 'tool' -%}"
+        "        {%- if loop.previtem and loop.previtem.role != 'tool' -%}"
+        "            {{- '<|im_start|>user' -}}"
+        "        {%- endif -%}"
+        "        {{- '\n<tool_response>\n' -}}"
+        "        {{- content -}}"
+        "        {{- '\n</tool_response>' -}}"
+        "        {%- if not loop.last and loop.nextitem.role != 'tool' -%}"
+        "            {{- '<|im_end|>\n' -}}"
+        "        {%- elif loop.last -%}"
+        "            {{- '<|im_end|>\n' -}}"
+        "        {%- endif -%}"
+        "    {%- else -%}"
+        "        {{- raise_exception('Unexpected message role.') -}}"
+        "    {%- endif -%}"
+        "{%- endfor -%}"
+        "{%- if add_generation_prompt -%}"
+        "    {{- '<|im_start|>assistant\n' -}}"
+        "    {%- if enable_thinking is false -%}"
+        "        {{- '<think>\n\n</think>\n\n' -}}"
+        "    {%- else -%}"
+        "        {{- '<think>\n' -}}"
+        "    {%- endif -%}"
+        "{%- endif -%}"
+    )
+
+    def __init__(
+        self,
+        enable_thinking: bool = True,
+        add_vision_id: bool = True,
+        **kwargs,
+    ):
+        """
+        Parameters:
+        - enable_thinking (bool):
+            - True (default): Enables reasoning for better results.
+            - False: Disables reasoning for faster results.
+        - add_vision_id (bool):
+            - True (default): Count all the images. Recommended for multi-image.
+            - False: Doesn't count the images. Can save tokens with single-image.
+        """
+        super().__init__(**kwargs)
+        self.enable_thinking = enable_thinking
+        self.extra_template_arguments["enable_thinking"] = enable_thinking
+        self.extra_template_arguments["add_vision_id"] = add_vision_id
+
+    def __call__(self, **kwargs):
+        llama = kwargs['llama']
+
+        if hasattr(llama, 'input_ids'):
+            llama.input_ids.fill(0)
+
+        if self.verbose:
+            print(f"{self.log_prefix}(enable_thinking={self.enable_thinking}) - Start processing")
+
+        # Use parent implementation
+        return super().__call__(**kwargs)
 
 @register_chat_completion_handler("chatml-function-calling")
 def chatml_function_calling(