Update Submodule vendor/llama.cpp 9db77a0..45cac7c

JamePeng · JamePeng · commit 7a19575ec579 · 2026-04-18T05:50:24.000+08:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -1014,7 +1014,7 @@ class llama_model_imatrix_data(ctypes.Structure):
 
     if TYPE_CHECKING:
         name: ctypes.c_char_p
-        data: ctypes.POINTER(ctypes.c_float)
+        data: ctypes.POINTER(ctypes.c_float) # type: ignore
         size: ctypes.c_size_t
 
 llama_model_imatrix_data_p = ctypes.POINTER(llama_model_imatrix_data)
@@ -1068,10 +1068,10 @@ class llama_model_quantize_params(ctypes.Structure):
         pure: bool
         keep_split: bool
         dry_run: bool
-        imatrix: ctypes.POINTER(llama_model_imatrix_data)
-        kv_overrides: ctypes.POINTER(llama_model_kv_override)
-        tensor_types: ctypes.POINTER(llama_model_tensor_override)
-        prune_layers: ctypes.POINTER(ctypes.c_int32)
+        imatrix: ctypes.POINTER(llama_model_imatrix_data) # type: ignore
+        kv_overrides: ctypes.POINTER(llama_model_kv_override) # type: ignore
+        tensor_types: ctypes.POINTER(llama_model_tensor_override) # type: ignore
+        prune_layers: ctypes.POINTER(ctypes.c_int32) # type: ignore
 
     _fields_ = [
         ("nthread", ctypes.c_int32),
@@ -1345,7 +1345,7 @@ def llama_numa_init(numa: int, /):
 )
 def llama_model_init_from_user(
     metadata: ctypes.c_void_p,
-    set_tensor_data: llama_model_set_tensor_data_t,
+    set_tensor_data: llama_model_set_tensor_data_t, # type: ignore
     set_tensor_data_ud: ctypes.c_void_p,
     params: llama_model_params,
     /
@@ -4548,7 +4548,7 @@ def llama_sampler_init_grammar_lazy_patterns(
     vocab: llama_vocab_p,
     grammar_str: bytes,
     grammar_root: bytes,
-    trigger_patterns: CtypesArray[bytes],
+    trigger_patterns: CtypesArray[bytes], # type: ignore
     num_trigger_patterns: int,
     trigger_tokens: CtypesArray[llama_token],
     num_trigger_tokens: int,
@@ -4803,8 +4803,8 @@ def llama_print_system_info() -> bytes:
     None,
 )
 def llama_log_get(
-    log_callback: Optional[ctypes.pointer(ggml_log_callback)],
-    user_data: ctypes.pointer(ctypes.c_void_p),
+    log_callback: Optional[ctypes.pointer(ggml_log_callback)], # type: ignore
+    user_data: ctypes.pointer(ctypes.c_void_p), # type: ignore
     /,
 ):
     """Get callback for all future logging events.
@@ -4819,7 +4819,7 @@ def llama_log_get(
     None,
 )
 def llama_log_set(
-    log_callback: Optional[ggml_log_callback],
+    log_callback: Optional[ggml_log_callback], # type: ignore
     user_data: ctypes.c_void_p,
     /,
 ):
diff --git a/llama_cpp/mtmd_cpp.py b/llama_cpp/mtmd_cpp.py
@@ -345,7 +345,7 @@ def mtmd_bitmap_init(
 )
 def mtmd_bitmap_init_from_audio(
     n_samples: c_uint,
-    data: POINTER(c_float),
+    data: POINTER(c_float), # type: ignore
     /,
 ) -> mtmd_bitmap_p:
     ...
@@ -582,6 +582,9 @@ class mtmd_decoder_pos(Structure):
         x: c_uint32
         y: c_uint32
 
+mtmd_decoder_pos_p = POINTER(mtmd_decoder_pos)
+mtmd_decoder_pos_p_ctypes = c_void_p
+
 # // get position for decoder attention, to be used by M-RoPE models
 # // i is the index of the embedding token, ranging from 0 to mtmd_image_tokens_get_n_tokens() - 1
 # // return relative position (for example, embedding 0 will have position (0, 0, 0);
@@ -633,7 +636,7 @@ def mtmd_tokenize(
     ctx: mtmd_context_p,
     output: mtmd_input_chunks_p,
     text: mtmd_input_text_p,
-    bitmaps: POINTER(mtmd_bitmap_p),
+    bitmaps: POINTER(mtmd_bitmap_p), # type: ignore
     n_bitmaps: c_uint,
     /,
 ) -> c_int32:
@@ -691,7 +694,7 @@ def mtmd_encode_chunk(
 # MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
 @ctypes_function_mtmd(
     "mtmd_get_output_embd", [mtmd_context_p_ctypes], POINTER(c_float))
-def mtmd_get_output_embd(ctx: mtmd_context_p) -> POINTER(c_float):
+def mtmd_get_output_embd(ctx: mtmd_context_p) -> POINTER(c_float): # type: ignore
     """
     get output embeddings from the last encode pass
     """
@@ -703,7 +706,7 @@ def mtmd_get_output_embd(ctx: mtmd_context_p) -> POINTER(c_float):
 # MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
 @ctypes_function_mtmd(
     "mtmd_log_set", [ggml_log_callback, c_void_p], None)
-def mtmd_log_set(log_callback: ggml_log_callback, user_data: c_void_p):
+def mtmd_log_set(log_callback: ggml_log_callback, user_data: c_void_p): # type: ignore
     """
     Set callback for all future logging events.
     """
@@ -735,7 +738,7 @@ def mtmd_test_create_input_chunks() -> mtmd_input_chunk_p:
 # MTMD_API void mtmd_helper_log_set(ggml_log_callback log_callback, void * user_data);
 @ctypes_function_mtmd(
     "mtmd_helper_log_set", [ggml_log_callback, c_void_p], None)
-def mtmd_helper_log_set(log_callback: ggml_log_callback, user_data: c_void_p):
+def mtmd_helper_log_set(log_callback: ggml_log_callback, user_data: c_void_p): # type: ignore
     """
     Set callback for all future logging events.
     """
@@ -810,6 +813,25 @@ def mtmd_helper_get_n_pos(chunks: mtmd_input_chunk_p) -> c_int32:
     ...
 
 
+# // helper to get the list of relative positions corresponding to the embedding tokens, to be used by M-RoPE
+# // out_pos must have length == mtmd_helper_get_n_tokens(image)
+# MTMD_API void mtmd_helper_image_get_decoder_pos(const mtmd_image_tokens * image, struct mtmd_decoder_pos * out_pos);
+@ctypes_function_mtmd("mtmd_helper_image_get_decoder_pos", [
+                        mtmd_image_tokens_p_ctypes,
+                        mtmd_decoder_pos_p_ctypes
+                    ],
+                    None)
+def mtmd_helper_image_get_decoder_pos(
+    image: mtmd_image_tokens_p,
+    out_pos: mtmd_decoder_pos_p # type: ignore
+) -> c_int32:
+    """
+    helper to get the list of relative positions corresponding to the embedding tokens, to be used by M-RoPE
+    out_pos must have length == mtmd_helper_get_n_tokens(image)
+    """
+    ...
+
+
 # // helper function that automatically:
 # // 1. run llama_decode() on text chunks
 # // 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
@@ -844,7 +866,7 @@ def mtmd_helper_eval_chunks(
     seq_id: c_int32,
     n_batch: c_int32,
     logits_last: c_bool,
-    new_n_past: POINTER(c_int32),
+    new_n_past: POINTER(c_int32), # type: ignore
     /,
 ) -> c_int32:
     """
@@ -887,7 +909,7 @@ def mtmd_helper_eval_chunk_single(
     seq_id: c_int32,
     n_batch: c_int32,
     logits_last: c_bool,
-    new_n_past: POINTER(c_int32),
+    new_n_past: POINTER(c_int32), # type: ignore
     /,
 ) -> c_int32:
     """
@@ -923,7 +945,7 @@ def mtmd_helper_decode_image_chunk(
     ctx: mtmd_context_p,
     lctx: llama_cpp.llama_context_p,
     chunks: mtmd_input_chunk_p,
-    encoded_embd: POINTER(c_float),
+    encoded_embd: POINTER(c_float), # type: ignore
     n_past: c_int32,
     seq_id: c_int32,
     n_batch: c_int32,
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 9db77a020c97ac3b13b7c1bf4e0c5787001533e7
+Subproject commit 45cac7ca703fb9085eae62b9121fca01d20177f6