Skip to content

Commit d7478de

Browse files
committed
Relocate LFM25VLChatHandler, add comment details, and update README.md
1 parent 9e749f9 commit d7478de

2 files changed

Lines changed: 98 additions & 91 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
740740
| [glm4.6v](https://huggingface.co/unsloth/GLM-4.6V-Flash-GGUF) | `GLM46VChatHandler` | `glm4.6v` |
741741
| [granite-docling](https://huggingface.co/ibm-granite/granite-docling-258M-GGUF) | `GraniteDoclingChatHandler` | `granite-docling` |
742742
| [lfm2-vl](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | `LFM2VLChatHandler` | `lfm2-vl` |
743+
| [lfm2.5-vl](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF) | `LFM25VLChatHandler` | `lfm2.5-vl` |
743744
| [paddleocr-vl-1.5](https://huggingface.co/JamePeng2023/PaddleOCR-VL-1.5-GGUF) | `PaddleOCRChatHandler` | `paddleocr` |
744745
| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
745746
| [qwen3-vl](https://huggingface.co/unsloth/Qwen3-VL-8B-Thinking-GGUF) | `Qwen3VLChatHandler` | `qwen3-vl` |

llama_cpp/llama_chat_format.py

Lines changed: 97 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -4944,6 +4944,103 @@ def __call__(self, **kwargs):
49444944
return super().__call__(**kwargs)
49454945

49464946

4947+
class LFM25VLChatHandler(MTMDChatHandler):
4948+
"""
4949+
Handler for LFM2.5-VL multimodal models.
4950+
4951+
Note(JamePeng): The suggestion is to compress the input image to 512x512 pixels to achieve native resolution processing.
4952+
"""
4953+
# Aligned with LFM2.5-VL tokenizer_config
4954+
LFM25VL_BOS_TOKEN = "<|startoftext|>"
4955+
LFM25VL_EOS_TOKEN = "<|im_end|>"
4956+
LFM25VL_PAD_TOKEN = "<|pad|>"
4957+
4958+
# Image specific tokens
4959+
LFM25VL_IMAGE_TOKEN = "<image>"
4960+
LFM25VL_IMAGE_START_TOKEN = "<|image_start|>"
4961+
LFM25VL_IMAGE_END_TOKEN = "<|image_end|>"
4962+
LFM25VL_IMAGE_THUMBNAIL = "<|img_thumbnail|>"
4963+
4964+
CHAT_FORMAT = (
4965+
"{{- bos_token -}}\n"
4966+
"{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n"
4967+
"{%- set ns = namespace(system_prompt='', content='') -%}\n"
4968+
"{%- if messages[0]['role'] == 'system' -%}\n"
4969+
" {%- set ns.system_prompt = messages[0]['content'] -%}\n"
4970+
" {%- set messages = messages[1:] -%}\n"
4971+
"{%- endif -%}\n"
4972+
"{%- if tools -%}\n"
4973+
" {%- set ns.system_prompt = ns.system_prompt + ('\\n' if ns.system_prompt else '') + 'List of tools: [' -%}\n"
4974+
" {%- for tool in tools -%}\n"
4975+
" {%- if tool is not string -%}\n"
4976+
" {%- set tool = tool | tojson -%}\n"
4977+
" {%- endif -%}\n"
4978+
" {%- set ns.system_prompt = ns.system_prompt + tool -%}\n"
4979+
" {%- if not loop.last -%}\n"
4980+
" {%- set ns.system_prompt = ns.system_prompt + ', ' -%}\n"
4981+
" {%- endif -%}\n"
4982+
" {%- endfor -%}\n"
4983+
" {%- set ns.system_prompt = ns.system_prompt + ']' -%}\n"
4984+
"{%- endif -%}\n"
4985+
"{%- if ns.system_prompt -%}\n"
4986+
" {{- '<|im_start|>system\\n' + ns.system_prompt + '<|im_end|>\\n' -}}\n"
4987+
"{%- endif -%}\n"
4988+
"{%- set ns.last_assistant_index = -1 -%}\n"
4989+
"{%- for message in messages -%}\n"
4990+
" {%- if message['role'] == 'assistant' -%}\n"
4991+
" {%- set ns.last_assistant_index = loop.index0 -%}\n"
4992+
" {%- endif -%}\n"
4993+
"{%- endfor -%}\n"
4994+
"{%- for message in messages -%}\n"
4995+
" {{- '<|im_start|>' + message['role'] + '\\n' -}}\n"
4996+
" {%- set content = message['content'] -%}\n"
4997+
" {%- if content is not string -%}\n"
4998+
" {%- set ns.content = '' -%}\n"
4999+
" {#- MTMD-style Multimodal Injection (Audio stripped for VL model) -#}\n"
5000+
" {%- for item in content -%}\n"
5001+
" {%- if item['type'] == 'image_url' -%}\n"
5002+
" {%- set img_val = item['image_url'] if item['image_url'] is string else item['image_url']['url'] -%}\n"
5003+
" {%- set ns.content = ns.content + img_val -%}\n"
5004+
" {%- elif item['type'] == 'text' -%}\n"
5005+
" {%- set ns.content = ns.content + item['text'] -%}\n"
5006+
" {%- else -%}\n"
5007+
" {%- set ns.content = ns.content + (item | tojson) -%}\n"
5008+
" {%- endif -%}\n"
5009+
" {%- endfor -%}\n"
5010+
" {%- set content = ns.content -%}\n"
5011+
" {%- endif -%}\n"
5012+
" {%- if message['role'] == 'assistant' and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n"
5013+
" {%- if '</think>' in content -%}\n"
5014+
" {%- set content = content.split('</think>')[-1] | trim -%}\n"
5015+
" {%- endif -%}\n"
5016+
" {%- endif -%}\n"
5017+
" {{- content + '<|im_end|>\\n' -}}\n"
5018+
"{%- endfor -%}\n"
5019+
"{%- if add_generation_prompt -%}\n"
5020+
" {{- '<|im_start|>assistant\\n' -}}\n"
5021+
"{%- endif -%}\n"
5022+
)
5023+
5024+
def __init__(self, keep_past_thinking: bool = False, **kwargs):
5025+
self.keep_past_thinking = keep_past_thinking
5026+
super().__init__(**kwargs)
5027+
5028+
5029+
def __call__(self, **kwargs):
5030+
if self.image_min_tokens > 256:
5031+
if self.verbose:
5032+
print(f"{self.log_prefix}: For LFM2.5-VL, using values higher than 256 for `image_min_tokens` could cause errors. Please reset it to between 64 and 256.")
5033+
self.image_min_tokens = -1
5034+
5035+
self.extra_template_arguments["keep_past_thinking"] = self.keep_past_thinking
5036+
5037+
kwargs['stop'] = [self.LFM25VL_EOS_TOKEN]
5038+
5039+
if self.verbose:
5040+
print(f"{self.log_prefix}(keep_past_thinking={self.keep_past_thinking}) - Start processing")
5041+
return super().__call__(**kwargs)
5042+
5043+
49475044
class PaddleOCRChatHandler(MTMDChatHandler):
49485045
"""
49495046
Handler for PaddleOCR 1.5 multimodal models.
@@ -5413,97 +5510,6 @@ def __call__(self, **kwargs):
54135510
# Use parent implementation
54145511
return super().__call__(**kwargs)
54155512

5416-
class LFM25VLChatHandler(MTMDChatHandler):
5417-
# Aligned with LFM2.5-VL tokenizer_config
5418-
LFM25VL_BOS_TOKEN = "<|startoftext|>"
5419-
LFM25VL_EOS_TOKEN = "<|im_end|>"
5420-
LFM25VL_PAD_TOKEN = "<|pad|>"
5421-
5422-
# Image specific tokens
5423-
LFM25VL_IMAGE_TOKEN = "<image>"
5424-
LFM25VL_IMAGE_START_TOKEN = "<|image_start|>"
5425-
LFM25VL_IMAGE_END_TOKEN = "<|image_end|>"
5426-
LFM25VL_IMAGE_THUMBNAIL = "<|img_thumbnail|>"
5427-
5428-
CHAT_FORMAT = (
5429-
"{{- bos_token -}}\n"
5430-
"{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n"
5431-
"{%- set ns = namespace(system_prompt='', content='') -%}\n"
5432-
"{%- if messages[0]['role'] == 'system' -%}\n"
5433-
" {%- set ns.system_prompt = messages[0]['content'] -%}\n"
5434-
" {%- set messages = messages[1:] -%}\n"
5435-
"{%- endif -%}\n"
5436-
"{%- if tools -%}\n"
5437-
" {%- set ns.system_prompt = ns.system_prompt + ('\\n' if ns.system_prompt else '') + 'List of tools: [' -%}\n"
5438-
" {%- for tool in tools -%}\n"
5439-
" {%- if tool is not string -%}\n"
5440-
" {%- set tool = tool | tojson -%}\n"
5441-
" {%- endif -%}\n"
5442-
" {%- set ns.system_prompt = ns.system_prompt + tool -%}\n"
5443-
" {%- if not loop.last -%}\n"
5444-
" {%- set ns.system_prompt = ns.system_prompt + ', ' -%}\n"
5445-
" {%- endif -%}\n"
5446-
" {%- endfor -%}\n"
5447-
" {%- set ns.system_prompt = ns.system_prompt + ']' -%}\n"
5448-
"{%- endif -%}\n"
5449-
"{%- if ns.system_prompt -%}\n"
5450-
" {{- '<|im_start|>system\\n' + ns.system_prompt + '<|im_end|>\\n' -}}\n"
5451-
"{%- endif -%}\n"
5452-
"{%- set ns.last_assistant_index = -1 -%}\n"
5453-
"{%- for message in messages -%}\n"
5454-
" {%- if message['role'] == 'assistant' -%}\n"
5455-
" {%- set ns.last_assistant_index = loop.index0 -%}\n"
5456-
" {%- endif -%}\n"
5457-
"{%- endfor -%}\n"
5458-
"{%- for message in messages -%}\n"
5459-
" {{- '<|im_start|>' + message['role'] + '\\n' -}}\n"
5460-
" {%- set content = message['content'] -%}\n"
5461-
" {%- if content is not string -%}\n"
5462-
" {%- set ns.content = '' -%}\n"
5463-
" {#- MTMD-style Multimodal Injection (Audio stripped for VL model) -#}\n"
5464-
" {%- for item in content -%}\n"
5465-
" {%- if item['type'] == 'image_url' -%}\n"
5466-
" {%- set img_val = item['image_url'] if item['image_url'] is string else item['image_url']['url'] -%}\n"
5467-
" {%- set ns.content = ns.content + img_val -%}\n"
5468-
" {%- elif item['type'] == 'text' -%}\n"
5469-
" {%- set ns.content = ns.content + item['text'] -%}\n"
5470-
" {%- else -%}\n"
5471-
" {%- set ns.content = ns.content + (item | tojson) -%}\n"
5472-
" {%- endif -%}\n"
5473-
" {%- endfor -%}\n"
5474-
" {%- set content = ns.content -%}\n"
5475-
" {%- endif -%}\n"
5476-
" {%- if message['role'] == 'assistant' and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n"
5477-
" {%- if '</think>' in content -%}\n"
5478-
" {%- set content = content.split('</think>')[-1] | trim -%}\n"
5479-
" {%- endif -%}\n"
5480-
" {%- endif -%}\n"
5481-
" {{- content + '<|im_end|>\\n' -}}\n"
5482-
"{%- endfor -%}\n"
5483-
"{%- if add_generation_prompt -%}\n"
5484-
" {{- '<|im_start|>assistant\\n' -}}\n"
5485-
"{%- endif -%}\n"
5486-
)
5487-
5488-
def __init__(self, keep_past_thinking: bool = False, **kwargs):
5489-
self.keep_past_thinking = keep_past_thinking
5490-
super().__init__(**kwargs)
5491-
5492-
5493-
def __call__(self, **kwargs):
5494-
if self.image_min_tokens > 256:
5495-
if self.verbose:
5496-
print(f"For LFM2.5-VL, using values higher than 256 for `image_min_tokens` could cause errors. Setting to **256**.")
5497-
5498-
self.image_min_tokens = 256
5499-
5500-
self.extra_template_arguments["keep_past_thinking"] = self.keep_past_thinking
5501-
5502-
kwargs['stop'] = [self.LFM25VL_EOS_TOKEN]
5503-
5504-
if self.verbose:
5505-
print(f"{self.log_prefix}(keep_past_thinking={self.keep_past_thinking}) - Start processing")
5506-
return super().__call__(**kwargs)
55075513

55085514
@register_chat_completion_handler("chatml-function-calling")
55095515
def chatml_function_calling(

0 commit comments

Comments
 (0)