@@ -4944,6 +4944,103 @@ def __call__(self, **kwargs):
49444944 return super ().__call__ (** kwargs )
49454945
49464946
4947+ class LFM25VLChatHandler (MTMDChatHandler ):
4948+ """
4949+ Handler for LFM2.5-VL multimodal models.
4950+
4951+ Note(JamePeng): The suggestion is to compress the input image to 512x512 pixels to achieve native resolution processing.
4952+ """
4953+ # Aligned with LFM2.5-VL tokenizer_config
4954+ LFM25VL_BOS_TOKEN = "<|startoftext|>"
4955+ LFM25VL_EOS_TOKEN = "<|im_end|>"
4956+ LFM25VL_PAD_TOKEN = "<|pad|>"
4957+
4958+ # Image specific tokens
4959+ LFM25VL_IMAGE_TOKEN = "<image>"
4960+ LFM25VL_IMAGE_START_TOKEN = "<|image_start|>"
4961+ LFM25VL_IMAGE_END_TOKEN = "<|image_end|>"
4962+ LFM25VL_IMAGE_THUMBNAIL = "<|img_thumbnail|>"
4963+
4964+ CHAT_FORMAT = (
4965+ "{{- bos_token -}}\n "
4966+ "{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n "
4967+ "{%- set ns = namespace(system_prompt='', content='') -%}\n "
4968+ "{%- if messages[0]['role'] == 'system' -%}\n "
4969+ " {%- set ns.system_prompt = messages[0]['content'] -%}\n "
4970+ " {%- set messages = messages[1:] -%}\n "
4971+ "{%- endif -%}\n "
4972+ "{%- if tools -%}\n "
4973+ " {%- set ns.system_prompt = ns.system_prompt + ('\\ n' if ns.system_prompt else '') + 'List of tools: [' -%}\n "
4974+ " {%- for tool in tools -%}\n "
4975+ " {%- if tool is not string -%}\n "
4976+ " {%- set tool = tool | tojson -%}\n "
4977+ " {%- endif -%}\n "
4978+ " {%- set ns.system_prompt = ns.system_prompt + tool -%}\n "
4979+ " {%- if not loop.last -%}\n "
4980+ " {%- set ns.system_prompt = ns.system_prompt + ', ' -%}\n "
4981+ " {%- endif -%}\n "
4982+ " {%- endfor -%}\n "
4983+ " {%- set ns.system_prompt = ns.system_prompt + ']' -%}\n "
4984+ "{%- endif -%}\n "
4985+ "{%- if ns.system_prompt -%}\n "
4986+ " {{- '<|im_start|>system\\ n' + ns.system_prompt + '<|im_end|>\\ n' -}}\n "
4987+ "{%- endif -%}\n "
4988+ "{%- set ns.last_assistant_index = -1 -%}\n "
4989+ "{%- for message in messages -%}\n "
4990+ " {%- if message['role'] == 'assistant' -%}\n "
4991+ " {%- set ns.last_assistant_index = loop.index0 -%}\n "
4992+ " {%- endif -%}\n "
4993+ "{%- endfor -%}\n "
4994+ "{%- for message in messages -%}\n "
4995+ " {{- '<|im_start|>' + message['role'] + '\\ n' -}}\n "
4996+ " {%- set content = message['content'] -%}\n "
4997+ " {%- if content is not string -%}\n "
4998+ " {%- set ns.content = '' -%}\n "
4999+ " {#- MTMD-style Multimodal Injection (Audio stripped for VL model) -#}\n "
5000+ " {%- for item in content -%}\n "
5001+ " {%- if item['type'] == 'image_url' -%}\n "
5002+ " {%- set img_val = item['image_url'] if item['image_url'] is string else item['image_url']['url'] -%}\n "
5003+ " {%- set ns.content = ns.content + img_val -%}\n "
5004+ " {%- elif item['type'] == 'text' -%}\n "
5005+ " {%- set ns.content = ns.content + item['text'] -%}\n "
5006+ " {%- else -%}\n "
5007+ " {%- set ns.content = ns.content + (item | tojson) -%}\n "
5008+ " {%- endif -%}\n "
5009+ " {%- endfor -%}\n "
5010+ " {%- set content = ns.content -%}\n "
5011+ " {%- endif -%}\n "
5012+ " {%- if message['role'] == 'assistant' and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n "
5013+ " {%- if '</think>' in content -%}\n "
5014+ " {%- set content = content.split('</think>')[-1] | trim -%}\n "
5015+ " {%- endif -%}\n "
5016+ " {%- endif -%}\n "
5017+ " {{- content + '<|im_end|>\\ n' -}}\n "
5018+ "{%- endfor -%}\n "
5019+ "{%- if add_generation_prompt -%}\n "
5020+ " {{- '<|im_start|>assistant\\ n' -}}\n "
5021+ "{%- endif -%}\n "
5022+ )
5023+
5024+ def __init__ (self , keep_past_thinking : bool = False , ** kwargs ):
5025+ self .keep_past_thinking = keep_past_thinking
5026+ super ().__init__ (** kwargs )
5027+
5028+
5029+ def __call__ (self , ** kwargs ):
5030+ if self .image_min_tokens > 256 :
5031+ if self .verbose :
5032+ print (f"{ self .log_prefix } : For LFM2.5-VL, using values higher than 256 for `image_min_tokens` could cause errors. Please reset it to between 64 and 256." )
5033+ self .image_min_tokens = - 1
5034+
5035+ self .extra_template_arguments ["keep_past_thinking" ] = self .keep_past_thinking
5036+
5037+ kwargs ['stop' ] = [self .LFM25VL_EOS_TOKEN ]
5038+
5039+ if self .verbose :
5040+ print (f"{ self .log_prefix } (keep_past_thinking={ self .keep_past_thinking } ) - Start processing" )
5041+ return super ().__call__ (** kwargs )
5042+
5043+
49475044class PaddleOCRChatHandler (MTMDChatHandler ):
49485045 """
49495046 Handler for PaddleOCR 1.5 multimodal models.
@@ -5413,97 +5510,6 @@ def __call__(self, **kwargs):
54135510 # Use parent implementation
54145511 return super ().__call__ (** kwargs )
54155512
5416- class LFM25VLChatHandler (MTMDChatHandler ):
5417- # Aligned with LFM2.5-VL tokenizer_config
5418- LFM25VL_BOS_TOKEN = "<|startoftext|>"
5419- LFM25VL_EOS_TOKEN = "<|im_end|>"
5420- LFM25VL_PAD_TOKEN = "<|pad|>"
5421-
5422- # Image specific tokens
5423- LFM25VL_IMAGE_TOKEN = "<image>"
5424- LFM25VL_IMAGE_START_TOKEN = "<|image_start|>"
5425- LFM25VL_IMAGE_END_TOKEN = "<|image_end|>"
5426- LFM25VL_IMAGE_THUMBNAIL = "<|img_thumbnail|>"
5427-
5428- CHAT_FORMAT = (
5429- "{{- bos_token -}}\n "
5430- "{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n "
5431- "{%- set ns = namespace(system_prompt='', content='') -%}\n "
5432- "{%- if messages[0]['role'] == 'system' -%}\n "
5433- " {%- set ns.system_prompt = messages[0]['content'] -%}\n "
5434- " {%- set messages = messages[1:] -%}\n "
5435- "{%- endif -%}\n "
5436- "{%- if tools -%}\n "
5437- " {%- set ns.system_prompt = ns.system_prompt + ('\\ n' if ns.system_prompt else '') + 'List of tools: [' -%}\n "
5438- " {%- for tool in tools -%}\n "
5439- " {%- if tool is not string -%}\n "
5440- " {%- set tool = tool | tojson -%}\n "
5441- " {%- endif -%}\n "
5442- " {%- set ns.system_prompt = ns.system_prompt + tool -%}\n "
5443- " {%- if not loop.last -%}\n "
5444- " {%- set ns.system_prompt = ns.system_prompt + ', ' -%}\n "
5445- " {%- endif -%}\n "
5446- " {%- endfor -%}\n "
5447- " {%- set ns.system_prompt = ns.system_prompt + ']' -%}\n "
5448- "{%- endif -%}\n "
5449- "{%- if ns.system_prompt -%}\n "
5450- " {{- '<|im_start|>system\\ n' + ns.system_prompt + '<|im_end|>\\ n' -}}\n "
5451- "{%- endif -%}\n "
5452- "{%- set ns.last_assistant_index = -1 -%}\n "
5453- "{%- for message in messages -%}\n "
5454- " {%- if message['role'] == 'assistant' -%}\n "
5455- " {%- set ns.last_assistant_index = loop.index0 -%}\n "
5456- " {%- endif -%}\n "
5457- "{%- endfor -%}\n "
5458- "{%- for message in messages -%}\n "
5459- " {{- '<|im_start|>' + message['role'] + '\\ n' -}}\n "
5460- " {%- set content = message['content'] -%}\n "
5461- " {%- if content is not string -%}\n "
5462- " {%- set ns.content = '' -%}\n "
5463- " {#- MTMD-style Multimodal Injection (Audio stripped for VL model) -#}\n "
5464- " {%- for item in content -%}\n "
5465- " {%- if item['type'] == 'image_url' -%}\n "
5466- " {%- set img_val = item['image_url'] if item['image_url'] is string else item['image_url']['url'] -%}\n "
5467- " {%- set ns.content = ns.content + img_val -%}\n "
5468- " {%- elif item['type'] == 'text' -%}\n "
5469- " {%- set ns.content = ns.content + item['text'] -%}\n "
5470- " {%- else -%}\n "
5471- " {%- set ns.content = ns.content + (item | tojson) -%}\n "
5472- " {%- endif -%}\n "
5473- " {%- endfor -%}\n "
5474- " {%- set content = ns.content -%}\n "
5475- " {%- endif -%}\n "
5476- " {%- if message['role'] == 'assistant' and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n "
5477- " {%- if '</think>' in content -%}\n "
5478- " {%- set content = content.split('</think>')[-1] | trim -%}\n "
5479- " {%- endif -%}\n "
5480- " {%- endif -%}\n "
5481- " {{- content + '<|im_end|>\\ n' -}}\n "
5482- "{%- endfor -%}\n "
5483- "{%- if add_generation_prompt -%}\n "
5484- " {{- '<|im_start|>assistant\\ n' -}}\n "
5485- "{%- endif -%}\n "
5486- )
5487-
5488- def __init__ (self , keep_past_thinking : bool = False , ** kwargs ):
5489- self .keep_past_thinking = keep_past_thinking
5490- super ().__init__ (** kwargs )
5491-
5492-
5493- def __call__ (self , ** kwargs ):
5494- if self .image_min_tokens > 256 :
5495- if self .verbose :
5496- print (f"For LFM2.5-VL, using values higher than 256 for `image_min_tokens` could cause errors. Setting to **256**." )
5497-
5498- self .image_min_tokens = 256
5499-
5500- self .extra_template_arguments ["keep_past_thinking" ] = self .keep_past_thinking
5501-
5502- kwargs ['stop' ] = [self .LFM25VL_EOS_TOKEN ]
5503-
5504- if self .verbose :
5505- print (f"{ self .log_prefix } (keep_past_thinking={ self .keep_past_thinking } ) - Start processing" )
5506- return super ().__call__ (** kwargs )
55075513
55085514@register_chat_completion_handler ("chatml-function-calling" )
55095515def chatml_function_calling (
0 commit comments