Skip to content

Commit af399a7

Browse files
committed
Render images
1 parent 11dee17 commit af399a7

File tree

1 file changed

+42
-22
lines changed

1 file changed

+42
-22
lines changed

LLama.Examples/Examples/MtmdInteractiveModeExecute.cs

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
using System.Collections.Generic;
2+
using System.IO;
13
using System.Text.RegularExpressions;
24
using LLama.Common;
35
using Spectre.Console;
@@ -6,7 +8,7 @@
68

79
namespace LLama.Examples.Examples
810
{
9-
// This example shows how to chat with LLaVA model with both image and text as input.
11+
// This example shows how to chat with Mtmd model with both image and text as input.
1012
// It uses the interactive executor to inference.
1113
public class MtmdInteractiveModeExecute
1214
{
@@ -15,7 +17,7 @@ public static async Task Run()
1517
string multiModalProj = UserSettings.GetMMProjPath();
1618
string modelPath = UserSettings.GetModelPath();
1719
string modelImage = UserSettings.GetImagePath();
18-
const int maxTokens = 1024;
20+
const int maxTokens = 2048;
1921

2022
var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";
2123

@@ -44,39 +46,57 @@ public static async Task Run()
4446
Temperature = 0.1f
4547
},
4648

47-
AntiPrompts = new List<string> { "\nUSER:" },
49+
AntiPrompts = new List<string> { "\nASSISTANT:" },
4850
MaxTokens = maxTokens
4951

5052
};
5153

5254
do
5355
{
5456

55-
// Evaluate if we have images
57+
// Evaluate if we have media
5658
//
57-
var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
58-
var imageCount = imageMatches.Count();
59-
var hasImages = imageCount > 0;
59+
var mediaMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
60+
var mediaCount = mediaMatches.Count();
61+
var hasMedia = mediaCount > 0;
6062

61-
if (hasImages)
63+
if (hasMedia)
6264
{
63-
var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
64-
var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
65+
var mediaPathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
66+
var mediaPaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
6567

66-
var bitmaps = new List<SafeMtmdEmbed>();
68+
var embeds = new List<SafeMtmdEmbed>();
69+
var imageList = new List<byte[]>();
70+
var imageExtensions = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
71+
{
72+
".png",
73+
".jpg",
74+
".jpeg",
75+
".bmp",
76+
".gif",
77+
".webp"
78+
};
79+
6780
try
6881
{
69-
foreach (var imagePath in imagePaths)
82+
foreach (var mediaPath in mediaPaths)
7083
{
71-
var bitmap = clipModel.LoadMedia(imagePath);
72-
bitmaps.Add(bitmap);
84+
var extension = Path.GetExtension(mediaPath);
85+
if (!string.IsNullOrEmpty(extension) && imageExtensions.Contains(extension))
86+
{
87+
// Keep the raw image data so the caller can reuse or inspect the images later.
88+
imageList.Add(File.ReadAllBytes(mediaPath));
89+
}
90+
91+
var embed = clipModel.LoadMedia(mediaPath);
92+
embeds.Add(embed);
7393
}
7494
}
7595
catch (IOException exception)
7696
{
7797
Console.ForegroundColor = ConsoleColor.Red;
7898
Console.Write(
79-
$"Could not load your {(imageCount == 1 ? "image" : "images")}:");
99+
$"Could not load your {(mediaCount == 1 ? "media" : "medias")}:");
80100
Console.Write($"{exception.Message}");
81101
Console.ForegroundColor = ConsoleColor.Yellow;
82102
Console.WriteLine("Please try again.");
@@ -91,7 +111,7 @@ public static async Task Run()
91111
ex.Context.NativeHandle.MemorySequenceRemove( LLamaSeqId.Zero, -1, -1 );
92112

93113
// Replace placeholders with media markers (one marker per image)
94-
foreach (var path in imagePathsWithCurlyBraces)
114+
foreach (var path in mediaPathsWithCurlyBraces)
95115
{
96116
prompt = prompt.Replace(path, mediaMarker, StringComparison.Ordinal);
97117
}
@@ -100,11 +120,11 @@ public static async Task Run()
100120
Console.WriteLine($"Here are the images, that are sent to the chat model in addition to your message.");
101121
Console.WriteLine();
102122

103-
// foreach (var consoleImage in bitmaps.Select(embed => new CanvasImage(embed.GetDataSpan().ToArray())))
104-
// {
105-
// consoleImage.MaxWidth = 50;
106-
// AnsiConsole.Write(consoleImage);
107-
// }
123+
foreach (var consoleImage in imageList.Select(image => new CanvasImage(image.ToArray())))
124+
{
125+
consoleImage.MaxWidth = 50;
126+
AnsiConsole.Write(consoleImage);
127+
}
108128

109129
Console.WriteLine();
110130
Console.ForegroundColor = ConsoleColor.Yellow;
@@ -116,7 +136,7 @@ public static async Task Run()
116136
// Initialize Images in executor
117137
//
118138
ex.Embeds.Clear();
119-
foreach (var embed in bitmaps)
139+
foreach (var embed in embeds)
120140
ex.Embeds.Add(embed);
121141
}
122142

0 commit comments

Comments
 (0)