From 7d78e7575947505de978cb39e76bc223f1ffeb3f Mon Sep 17 00:00:00 2001 From: abetlen Date: Sat, 13 Jun 2026 09:59:48 -0700 Subject: [PATCH] feat: update llama.cpp to ggml-org/llama.cpp@f05cf4676 --- CHANGELOG.md | 2 +- llama_cpp/mtmd_cpp.py | 56 ++++++++++++++++++++++++++++++++++++++++++- vendor/llama.cpp | 2 +- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46c57c5d9..084865cd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - feat(example): support server video inputs and Gemma text tool calls by @abetlen in #2291 -- feat: update llama.cpp to ggml-org/llama.cpp@3e7bd4f39 +- feat: update llama.cpp to ggml-org/llama.cpp@f05cf4676 - fix(example): support multi-step Responses tool streaming by @abetlen in #2288 - fix(ci): Repair Linux accelerator wheels for manylinux publishing diff --git a/llama_cpp/mtmd_cpp.py b/llama_cpp/mtmd_cpp.py index 919cefb35..46eb2c879 100644 --- a/llama_cpp/mtmd_cpp.py +++ b/llama_cpp/mtmd_cpp.py @@ -76,6 +76,9 @@ mtmd_input_chunks_p = NewType("mtmd_input_chunks_p", int) mtmd_input_chunks_p_ctypes = c_void_p +mtmd_batch_p = NewType("mtmd_batch_p", int) +mtmd_batch_p_ctypes = c_void_p + # Enums MTMD_INPUT_CHUNK_TYPE_TEXT = 0 MTMD_INPUT_CHUNK_TYPE_IMAGE = 1 @@ -102,6 +105,7 @@ class mtmd_context_params(Structure): image_max_tokens: int cb_eval: llama_cpp.ggml_backend_sched_eval_callback cb_eval_user_data: c_void_p + batch_max_tokens: int _fields_ = [ ("use_gpu", c_bool), @@ -115,6 +119,7 @@ class mtmd_context_params(Structure): ("image_max_tokens", c_int), ("cb_eval", llama_cpp.ggml_backend_sched_eval_callback), ("cb_eval_user_data", c_void_p), + ("batch_max_tokens", c_int), ] @@ -596,7 +601,7 @@ def mtmd_image_tokens_get_decoder_pos( c_int, ) def mtmd_encode(ctx: mtmd_context_p, image_tokens: mtmd_image_tokens_p, /) -> int: - """Run an MTMD encode pass for image tokens.""" + """Run a deprecated MTMD encode pass for image tokens.""" ... @@ -618,6 +623,55 @@ def mtmd_get_output_embd(ctx: mtmd_context_p, /) -> Optional[CtypesArray[c_float ... +# MTMD_API mtmd_batch * mtmd_batch_init(mtmd_context * ctx); +@ctypes_function("mtmd_batch_init", [mtmd_context_p_ctypes], mtmd_batch_p_ctypes) +def mtmd_batch_init(ctx: mtmd_context_p, /) -> Optional[mtmd_batch_p]: + """Initialize an MTMD media chunk batch for a context.""" + ... + + +# MTMD_API void mtmd_batch_free(mtmd_batch * batch); +@ctypes_function("mtmd_batch_free", [mtmd_batch_p_ctypes], None) +def mtmd_batch_free(batch: mtmd_batch_p, /): ... + + +# MTMD_API int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk); +@ctypes_function( + "mtmd_batch_add_chunk", + [mtmd_batch_p_ctypes, mtmd_input_chunk_p_ctypes], + c_int, +) +def mtmd_batch_add_chunk( + batch: mtmd_batch_p, + chunk: mtmd_input_chunk_p, + /, +) -> int: + """Add a media chunk to an MTMD batch.""" + ... + + +# MTMD_API int32_t mtmd_batch_encode(mtmd_batch * batch); +@ctypes_function("mtmd_batch_encode", [mtmd_batch_p_ctypes], c_int) +def mtmd_batch_encode(batch: mtmd_batch_p, /) -> int: + """Run an MTMD encode pass for all chunks in a batch.""" + ... + + +# MTMD_API float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk); +@ctypes_function( + "mtmd_batch_get_output_embd", + [mtmd_batch_p_ctypes, mtmd_input_chunk_p_ctypes], + POINTER(c_float), +) +def mtmd_batch_get_output_embd( + batch: mtmd_batch_p, + chunk: mtmd_input_chunk_p, + /, +) -> Optional[CtypesArray[c_float]]: + """Get output embeddings for a chunk from the last batch encode pass.""" + ... + + # MTMD_API struct mtmd_caps mtmd_get_cap_from_file(const char * mmproj_fname); @ctypes_function("mtmd_get_cap_from_file", [c_char_p], mtmd_caps) def mtmd_get_cap_from_file(mmproj_fname: bytes, /) -> mtmd_caps: diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 3e7bd4f39..f05cf4676 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 3e7bd4f39ac59167f82103e1fc22dc4585c489d3 +Subproject commit f05cf4676af46c2f017c0e6ba25b6e20204f700e