SciSharp
diff --git a/‎LLama.Examples/ExampleRunner.cs‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Examples/ExampleRunner.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Examples/Examples/BatchedExecutorLLava.cs‎
Lines changed: 0 additions & 92 deletions b/‎LLama.Examples/Examples/BatchedExecutorLLava.cs‎
Lines changed: 0 additions & 92 deletions
diff --git a/‎LLama.Examples/Examples/BatchedExecutorMtmd.cs‎
Lines changed: 112 additions & 0 deletions b/‎LLama.Examples/Examples/BatchedExecutorMtmd.cs‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎LLama.Examples/Examples/MtmdInteractiveModeExecute.cs‎
Lines changed: 1 addition & 0 deletions b/‎LLama.Examples/Examples/MtmdInteractiveModeExecute.cs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎LLama.Examples/LLama.Examples.csproj‎
Lines changed: 1 addition & 1 deletion b/‎LLama.Examples/LLama.Examples.csproj‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LLama.Unittest/MtmdExecutorTests.cs‎
Lines changed: 81 additions & 0 deletions b/‎LLama.Unittest/MtmdExecutorTests.cs‎
Lines changed: 81 additions & 0 deletions
@@ -33,7 +33,7 @@ public class ExampleRunner
         { "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
         { "Batched Executor: Fork", BatchedExecutorFork.Run },
         { "Batched Executor: Rewind", BatchedExecutorRewind.Run },
- //       { "Batched Executor: LLava", BatchedExecutorLLava.Run },
+        { "Batched Executor: Mtmd", BatchedExecutorMtmd.Run },
         { "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
         { "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
         { "Custom Sampling Pipeline", CustomSampler.Run },
 
@@ -0,0 +1,112 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using LLama.Batched;
+using LLama.Common;
+using LLama.Exceptions;
+using LLama.Native;
+using LLama.Sampling;
+using Spectre.Console;
+
+namespace LLama.Examples.Examples;
+
+/// <summary>
+/// Demonstrates how to evaluate an image with MTMD helpers and continue generation by
+/// manually scheduling batches, similar to what the batched executor does internally.
+/// </summary>
+public class BatchedExecutorMtmd
+{
+    /// <summary>
+    /// Number of completion tokens to generate after sending the image prompt.
+    /// </summary>
+    public const int TokenCount = 64;
+
+    public static async Task Run()
+    {
+        var parameters = new ModelParams(UserSettings.GetModelPath());
+        using var model = await LLamaWeights.LoadFromFileAsync(parameters);
+        var mtmdParams = MtmdContextParams.Default();
+        mtmdParams.UseGpu = false;
+        var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";
+
+        using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams);
+
+        using var executor = new BatchedExecutor(model, parameters, mtmd);
+
+        var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
+        var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
+        var promptText = string.Concat(marker, promptSuffix);
+
+        var imagePath = UserSettings.GetImagePath();
+        AnsiConsole.Write(new CanvasImage(imagePath));
+
+        var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
+
+        var sampler = new DefaultSamplingPipeline
+        {
+            Temperature = 0.1f
+        };
+
+        var decoder = new StreamingTokenDecoder(executor.Context)
+        {
+            DecodeSpecialTokens = false
+        };
+
+        try
+        {
+            var conversation = executor.Create();
+            conversation.QueueMedia(imagePath);
+            conversation.Prompt(promptText, addBos: true, special: true);
+
+            Console.ForegroundColor = ConsoleColor.Yellow;
+            Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
+            Console.ResetColor();
+
+            var remaining = TokenCount;
+            while (remaining > 0)
+            {
+                var decodeResult = await executor.Infer();
+                if (decodeResult == DecodeResult.NoKvSlot)
+                {
+                    Console.ForegroundColor = ConsoleColor.Red;
+                    Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
+                    Console.ResetColor();
+                    break;
+                }
+
+                if (decodeResult != DecodeResult.Ok)
+                    throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");
+
+                if (!conversation.RequiresSampling)
+                    continue;
+
+                var token = conversation.Sample(sampler);
+                if (token.IsEndOfGeneration(vocab))
+                    break;
+
+                decoder.Add(token);
+                var delta = decoder.Read();
+                if (!string.IsNullOrEmpty(delta))
+                    Console.Write(delta);
+
+                sampler.Accept(token);
+                conversation.Prompt(token);
+                remaining--;
+            }
+
+            Console.WriteLine();
+        }
+        catch (IOException ex)
+        {
+            Console.ForegroundColor = ConsoleColor.Red;
+            Console.WriteLine($"Could not load media '{imagePath}': {ex.Message}");
+            Console.ResetColor();
+        }
+        catch (RuntimeError ex)
+        {
+            Console.ForegroundColor = ConsoleColor.Red;
+            Console.WriteLine($"MTMD processing failed: {ex.Message}");
+            Console.ResetColor();
+        }
+    }
+}
@@ -24,6 +24,7 @@ public static async Task Run()
             var parameters = new ModelParams(modelPath);
 
             var mtmdParameters = MtmdContextParams.Default();
+            mtmdParameters.UseGpu = false;
 
             using var model = await LLamaWeights.LoadFromFileAsync(parameters);
             using var context = model.CreateContext(parameters);
 
@@ -9,7 +9,7 @@
     <!-- Set IncludeBuiltInRuntimes to false to include your own runtime libraries and not link the defaults -->
     <IncludeBuiltInRuntimes>true</IncludeBuiltInRuntimes>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-	  <LangVersion>12</LangVersion>
+	  <LangVersion>13</LangVersion>
 	<NoWarn>1701;1702;8604;SKEXP0001;SKEXP0050;SKEXP0052;SKEXP0003</NoWarn>
   </PropertyGroup>
 
 
@@ -0,0 +1,81 @@
+using System;
+using System.Collections.Generic;
+using System.Threading.Tasks;
+using LLama.Common;
+using LLama.Native;
+using Microsoft.Extensions.Logging.Abstractions;
+using Xunit;
+
+namespace LLama.Unittest;
+
+[Trait("Category", "NoCI")]
+public class MtmdExecutorTests : IDisposable
+{
+    private readonly LLamaWeights _weights;
+    private readonly MtmdContextParams _mtmdParams;
+    private readonly SafeMtmdWeights _mtmd;
+    private readonly ModelParams _modelParams;
+
+    public MtmdExecutorTests()
+    {
+        _modelParams = new ModelParams(Constants.MtmdModelPath)
+        {
+            ContextSize = 1024 * 8,
+            GpuLayerCount = Constants.CIGpuLayerCount,
+        };
+
+        _weights = LLamaWeights.LoadFromFile(_modelParams);
+
+        _mtmdParams = MtmdContextParams.Default();
+        _mtmdParams.NThreads = Math.Max(1, Constants.CIGpuLayerCount);
+        _mtmdParams.UseGpu = false;
+
+        _mtmd = SafeMtmdWeights.LoadFromFile(Constants.MtmdMmpPath, _weights, _mtmdParams);
+    }
+
+    public void Dispose()
+    {
+        _mtmd.Dispose();
+        _weights.Dispose();
+    }
+
+    [Fact]
+    public async Task InteractiveExecutor_EvaluateChunks_DoesNotRetokenize()
+    {
+        using var context = _weights.CreateContext(_modelParams, NullLogger.Instance);
+        var executor = new InteractiveExecutor(context, _mtmd, NullLogger.Instance);
+        var marker = _mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";
+        var prompt = $"{marker}\nDescribe the image succinctly.";
+
+        executor.Embeds.Add(_mtmd.LoadMedia(Constants.MtmdImage));
+
+        await foreach (var _ in executor.InferAsync(prompt, new InferenceParams { MaxTokens = 0 }))
+        {
+            Assert.True(false, "Prefill should not emit generated text");
+        }
+
+        var diagnostics = executor.GetDiagnostics();
+        Assert.Equal(diagnostics.EmbedCount, diagnostics.ConsumedCount);
+        Assert.Equal(diagnostics.ConsumedCount, diagnostics.PastCount);
+        Assert.Equal(0, diagnostics.PendingEmbedCount);
+    }
+
+    [Fact]
+    public async Task InstructExecutor_MtmdPromptAdvancesPastTokensOnce()
+    {
+        using var context = _weights.CreateContext(_modelParams, NullLogger.Instance);
+        var executor = new InstructExecutor(context, _mtmd, logger: NullLogger.Instance);
+        executor.Embeds.Add(_mtmd.LoadMedia(Constants.MtmdImage));
+
+        var prompt = $"{_mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>"} Provide details.";
+
+        await foreach (var _ in executor.InferAsync(prompt, new InferenceParams { MaxTokens = 0 }))
+        {
+        }
+
+        var diagnostics = executor.GetDiagnostics();
+        Assert.Equal(diagnostics.EmbedCount, diagnostics.ConsumedCount);
+        Assert.Equal(diagnostics.ConsumedCount, diagnostics.PastCount);
+        Assert.Equal(0, diagnostics.PendingEmbedCount);
+    }
+}