@@ -12,6 +12,7 @@ namespace LLama.Native
1212 /// </summary>
1313 public sealed class SafeMtmdModelHandle : SafeLLamaHandleBase
1414 {
15+ // Pending media embeddings queued for the next call to Tokenize.
1516 private readonly List < SafeMtmdEmbed > _pendingMedia = new ( ) ;
1617
1718 /// <inheritdoc />
@@ -23,13 +24,14 @@ protected override bool ReleaseHandle()
2324 }
2425
2526 /// <summary>
26- /// Load a model from the given file path into memory
27+ /// Load a multimodal projection model from disk and bind it to the supplied text model.
2728 /// </summary>
28- /// <param name="modelPath">MMP File (Multi-Modal Projections)</param>
29- /// <param name="verbosity">Verbosity level</param>
30- /// <returns>SafeHandle of the Clip Model</returns>
31- /// <exception cref="InvalidOperationException"></exception>
32- /// <exception cref="LoadWeightsFailedException"></exception>
29+ /// <param name="modelPath">Path to the MMP (Multi-Modal Projections) file.</param>
30+ /// <param name="textModel">Text model that provides tokenizer weights for the multimodal helper.</param>
31+ /// <param name="mtmdCtxParams">Optional context parameters; defaults are used when <c>null</c>.</param>
32+ /// <returns>Safe handle for the MTMD model.</returns>
33+ /// <exception cref="InvalidOperationException">The file exists but is not readable by the current process.</exception>
34+ /// <exception cref="LoadWeightsFailedException">The native loader failed to initialize the MTMD model.</exception>
3335 public static SafeMtmdModelHandle LoadFromFile ( string modelPath , LLamaWeights textModel , MtmdContextParams mtmdCtxParams )
3436 {
3537 // Try to open the model file, this will check:
@@ -64,29 +66,41 @@ public static SafeMtmdModelHandle LoadFromFile(string modelPath, LLamaWeights te
6466 }
6567
6668 /// <summary>
67- /// Load media from disk and keep it pending for the next tokenize call.
69+ /// Load media from disk and queue it for the next tokenize call.
6870 /// </summary>
71+ /// <param name="path">Absolute or relative path to the media asset.</param>
72+ /// <returns>Safe handle to the media embedding.</returns>
73+ /// <exception cref="ObjectDisposedException">The model handle has been disposed.</exception>
74+ /// <exception cref="RuntimeError">The native loader failed to ingest the file.</exception>
6975 public SafeMtmdEmbed LoadMediaFromFile ( string path )
7076 {
77+ EnsureNotDisposed ( ) ;
78+
7179 var embed = SafeMtmdEmbed . FromMediaFile ( this , path )
7280 ?? throw new RuntimeError ( $ "Failed to load media '{ path } '.") ;
7381 _pendingMedia . Add ( embed ) ;
7482 return embed ;
7583 }
7684
7785 /// <summary>
78- /// Load media from an in-memory buffer and keep it pending for the next tokenize call.
86+ /// Load media from an in-memory buffer and queue it for the next tokenize call.
7987 /// </summary>
88+ /// <param name="buffer">Binary buffer containing the encoded media data.</param>
89+ /// <returns>Safe handle to the media embedding.</returns>
90+ /// <exception cref="ObjectDisposedException">The model handle has been disposed.</exception>
91+ /// <exception cref="RuntimeError">The native loader failed to ingest the buffer contents.</exception>
8092 public SafeMtmdEmbed LoadMediaFromBuffer ( ReadOnlySpan < byte > buffer )
8193 {
94+ EnsureNotDisposed ( ) ;
95+
8296 var embed = SafeMtmdEmbed . FromMediaBuffer ( this , buffer )
8397 ?? throw new RuntimeError ( "Failed to load media from buffer." ) ;
8498 _pendingMedia . Add ( embed ) ;
8599 return embed ;
86100 }
87101
88102 /// <summary>
89- /// Clears any pending media buffers tracked for tokenization.
103+ /// Disposes and clears any media buffers currently queued for tokenization.
90104 /// </summary>
91105 public void ClearMedia ( )
92106 {
@@ -98,15 +112,23 @@ public void ClearMedia()
98112 /// <summary>
99113 /// Tokenize a prompt alongside the pending media buffers. Pending media is cleared on success.
100114 /// </summary>
115+ /// <param name="text">Prompt text to tokenize.</param>
116+ /// <param name="addSpecial">Whether to append special tokens automatically.</param>
117+ /// <param name="parseSpecial">Whether special tokens should be treated as user-provided text.</param>
118+ /// <param name="chunks">Receives the native chunk collection when tokenization succeeds.</param>
119+ /// <returns>Zero on success; otherwise the native mtmd tokenize error code.</returns>
120+ /// <exception cref="ObjectDisposedException">The model handle has been disposed.</exception>
101121 public int Tokenize ( string text , bool addSpecial , bool parseSpecial , out SafeMtmdInputChunks ? chunks )
102122 {
103123 EnsureNotDisposed ( ) ;
104124
105125 chunks = null ;
126+ // Allocate the chunk container before invoking the native tokenizer.
106127 var output = NativeApi . mtmd_input_chunks_init ( ) ;
107128 if ( output == IntPtr . Zero )
108129 throw new RuntimeError ( "Failed to allocate mtmd_input_chunks." ) ;
109130
131+ // Collect native pointers to the queued media embeddings.
110132 var bitmapHandles = new IntPtr [ _pendingMedia . Count ] ;
111133 for ( var i = 0 ; i < _pendingMedia . Count ; i ++ )
112134 bitmapHandles [ i ] = _pendingMedia [ i ] . NativePtr ;
@@ -138,6 +160,14 @@ public int Tokenize(string text, bool addSpecial, bool parseSpecial, out SafeMtm
138160 /// <summary>
139161 /// Evaluate a batch of chunks using the helper (mirrors mtmd-helper eval logic).
140162 /// </summary>
163+ /// <param name="chunks">Chunk collection produced by <see cref="Tokenize"/>.</param>
164+ /// <param name="llamaContext">Context handle that receives the evaluated tokens.</param>
165+ /// <param name="nPast">Number of past tokens; updated when evaluation succeeds.</param>
166+ /// <param name="seqId">Sequence identifier used for KV cache management.</param>
167+ /// <param name="nBatch">Maximum number of tokens to evaluate in a single batch.</param>
168+ /// <param name="logitsLast">Whether to request logits for the last token only.</param>
169+ /// <returns>Zero on success; otherwise the native helper error code.</returns>
170+ /// <exception cref="ArgumentNullException">Thrown when required handles are null.</exception>
141171 public int EvaluateChunks ( SafeMtmdInputChunks chunks , SafeLLamaContextHandle llamaContext , ref long nPast , int seqId , int nBatch , bool logitsLast )
142172 {
143173 EnsureNotDisposed ( ) ;
@@ -166,6 +196,14 @@ public int EvaluateChunks(SafeMtmdInputChunks chunks, SafeLLamaContextHandle lla
166196 /// <summary>
167197 /// Evaluate a single chunk helper.
168198 /// </summary>
199+ /// <param name="chunkPtr">Pointer to the chunk to evaluate.</param>
200+ /// <param name="llamaContext">Context handle that receives the evaluated tokens.</param>
201+ /// <param name="nPast">Number of past tokens; updated when evaluation succeeds.</param>
202+ /// <param name="seqId">Sequence identifier used for KV cache management.</param>
203+ /// <param name="nBatch">Maximum number of tokens to evaluate in a single batch.</param>
204+ /// <param name="logitsLast">Whether to request logits for the last token only.</param>
205+ /// <returns>Zero on success; otherwise the native helper error code.</returns>
206+ /// <exception cref="ArgumentNullException">Thrown when required handles are null.</exception>
169207 public int EvaluateChunk ( IntPtr chunkPtr , SafeLLamaContextHandle llamaContext , ref long nPast , int seqId , int nBatch , bool logitsLast )
170208 {
171209 EnsureNotDisposed ( ) ;
@@ -194,10 +232,21 @@ public int EvaluateChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext, r
194232 /// <summary>
195233 /// Decode a prepared image chunk whose embedding is already computed.
196234 /// </summary>
235+ /// <param name="chunkPtr">Pointer to the chunk whose embedding should be decoded.</param>
236+ /// <param name="llamaContext">Context handle used for decoding.</param>
237+ /// <param name="encodedEmbeddings">Pointer to the pre-computed embedding data.</param>
238+ /// <param name="nPast">Number of past tokens; updated when evaluation succeeds.</param>
239+ /// <param name="seqId">Sequence identifier used for KV cache management.</param>
240+ /// <param name="nBatch">Maximum number of tokens to evaluate in a single batch.</param>
241+ /// <returns>Zero on success; otherwise the native helper error code.</returns>
242+ /// <exception cref="ArgumentNullException">Thrown when required handles are null.</exception>
197243 public int DecodeImageChunk ( IntPtr chunkPtr , SafeLLamaContextHandle llamaContext , IntPtr encodedEmbeddings , ref long nPast , int seqId , int nBatch )
198244 {
199245 EnsureNotDisposed ( ) ;
200246
247+ if ( chunkPtr == IntPtr . Zero )
248+ throw new ArgumentNullException ( nameof ( chunkPtr ) ) ;
249+
201250 var result = NativeApi . mtmd_helper_decode_image_chunk (
202251 DangerousGetHandle ( ) ,
203252 llamaContext ? . DangerousGetHandle ( ) ?? throw new ArgumentNullException ( nameof ( llamaContext ) ) ,
@@ -214,13 +263,23 @@ public int DecodeImageChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext
214263 return result ;
215264 }
216265
266+ /// <summary>
267+ /// Get the number of tokens contained in the provided chunk collection.
268+ /// </summary>
269+ /// <param name="chunks">Chunk collection produced by <see cref="Tokenize"/>.</param>
270+ /// <returns>Total token count.</returns>
217271 public ulong CountTokens ( SafeMtmdInputChunks chunks )
218272 {
219273 if ( chunks == null )
220274 throw new ArgumentNullException ( nameof ( chunks ) ) ;
221275 return NativeApi . mtmd_helper_get_n_tokens ( chunks . NativePtr ) . ToUInt64 ( ) ;
222276 }
223277
278+ /// <summary>
279+ /// Get the number of positions contained in the provided chunk collection.
280+ /// </summary>
281+ /// <param name="chunks">Chunk collection produced by <see cref="Tokenize"/>.</param>
282+ /// <returns>Total number of positional slots consumed.</returns>
224283 public long CountPositions ( SafeMtmdInputChunks chunks )
225284 {
226285 if ( chunks == null )
@@ -231,7 +290,7 @@ public long CountPositions(SafeMtmdInputChunks chunks)
231290 #region native API
232291
233292 // mtmd_init_from_file(const char * mmproj_fname, const struct llama_model * text_model, const struct mtmd_context_params ctx_params);
234- // We don't know llama_model layout. Accept IntPtr for text_model .
293+ // The llama_model layout is opaque; expose it via SafeLlamaModelHandle to match the managed wrapper .
235294 [ DllImport ( NativeApi . mtmdLibraryName , EntryPoint = "mtmd_init_from_file" , CallingConvention = CallingConvention . Cdecl ) ]
236295 private static extern unsafe SafeMtmdModelHandle mtmd_init_from_file (
237296 byte * mmproj_fname ,
@@ -245,15 +304,37 @@ private static extern unsafe SafeMtmdModelHandle mtmd_init_from_file(
245304
246305
247306
307+ /// <summary>
308+ /// Finalizer to ensure native resources are released if Dispose was not called.
309+ /// </summary>
248310 ~ SafeMtmdModelHandle ( )
249311 {
250312 Dispose ( ) ;
251313 }
252314
315+ /// <summary>
316+ /// Indicates whether the model decodes using the non-causal path.
317+ /// </summary>
253318 public bool DecodeUseNonCausal ( ) => NativeApi . mtmd_decode_use_non_causal ( handle ) ;
319+
320+ /// <summary>
321+ /// Indicates whether the model decodes using multi-scale RoPE.
322+ /// </summary>
254323 public bool DecodeUseMRope ( ) => NativeApi . mtmd_decode_use_mrope ( handle ) ;
324+
325+ /// <summary>
326+ /// Indicates whether the model supports vision inputs.
327+ /// </summary>
255328 public bool SupportVision ( ) => NativeApi . mtmd_support_vision ( handle ) ;
329+
330+ /// <summary>
331+ /// Indicates whether the model supports audio inputs.
332+ /// </summary>
256333 public bool SupportAudio ( ) => NativeApi . mtmd_support_audio ( handle ) ;
334+
335+ /// <summary>
336+ /// Gets the audio bitrate advertised by the model.
337+ /// </summary>
257338 public int GetAudioBitrate ( ) => NativeApi . mtmd_get_audio_bitrate ( handle ) ;
258339
259340 private void EnsureNotDisposed ( )
0 commit comments