1+ using System . Collections . Generic ;
2+ using System . IO ;
13using System . Text . RegularExpressions ;
24using LLama . Common ;
35using Spectre . Console ;
68
79namespace LLama . Examples . Examples
810{
9- // This example shows how to chat with LLaVA model with both image and text as input.
11+ // This example shows how to chat with Mtmd model with both image and text as input.
1012 // It uses the interactive executor to inference.
1113 public class MtmdInteractiveModeExecute
1214 {
@@ -15,7 +17,7 @@ public static async Task Run()
1517 string multiModalProj = UserSettings . GetMMProjPath ( ) ;
1618 string modelPath = UserSettings . GetModelPath ( ) ;
1719 string modelImage = UserSettings . GetImagePath ( ) ;
18- const int maxTokens = 1024 ;
20+ const int maxTokens = 2048 ;
1921
2022 var prompt = $ "{{{modelImage}}}\n USER:\n Provide a full description of the image.\n ASSISTANT:\n ";
2123
@@ -44,39 +46,57 @@ public static async Task Run()
4446 Temperature = 0.1f
4547 } ,
4648
47- AntiPrompts = new List < string > { "\n USER :" } ,
49+ AntiPrompts = new List < string > { "\n ASSISTANT :" } ,
4850 MaxTokens = maxTokens
4951
5052 } ;
5153
5254 do
5355 {
5456
55- // Evaluate if we have images
57+ // Evaluate if we have media
5658 //
57- var imageMatches = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Value ) ;
58- var imageCount = imageMatches . Count ( ) ;
59- var hasImages = imageCount > 0 ;
59+ var mediaMatches = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Value ) ;
60+ var mediaCount = mediaMatches . Count ( ) ;
61+ var hasMedia = mediaCount > 0 ;
6062
61- if ( hasImages )
63+ if ( hasMedia )
6264 {
63- var imagePathsWithCurlyBraces = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Value ) ;
64- var imagePaths = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Groups [ 1 ] . Value ) . ToList ( ) ;
65+ var mediaPathsWithCurlyBraces = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Value ) ;
66+ var mediaPaths = Regex . Matches ( prompt , "{([^}]*)}" ) . Select ( m => m . Groups [ 1 ] . Value ) . ToList ( ) ;
6567
66- var bitmaps = new List < SafeMtmdEmbed > ( ) ;
68+ var embeds = new List < SafeMtmdEmbed > ( ) ;
69+ var imageList = new List < byte [ ] > ( ) ;
70+ var imageExtensions = new HashSet < string > ( StringComparer . OrdinalIgnoreCase )
71+ {
72+ ".png" ,
73+ ".jpg" ,
74+ ".jpeg" ,
75+ ".bmp" ,
76+ ".gif" ,
77+ ".webp"
78+ } ;
79+
6780 try
6881 {
69- foreach ( var imagePath in imagePaths )
82+ foreach ( var mediaPath in mediaPaths )
7083 {
71- var bitmap = clipModel . LoadMedia ( imagePath ) ;
72- bitmaps . Add ( bitmap ) ;
84+ var extension = Path . GetExtension ( mediaPath ) ;
85+ if ( ! string . IsNullOrEmpty ( extension ) && imageExtensions . Contains ( extension ) )
86+ {
87+ // Keep the raw image data so the caller can reuse or inspect the images later.
88+ imageList . Add ( File . ReadAllBytes ( mediaPath ) ) ;
89+ }
90+
91+ var embed = clipModel . LoadMedia ( mediaPath ) ;
92+ embeds . Add ( embed ) ;
7393 }
7494 }
7595 catch ( IOException exception )
7696 {
7797 Console . ForegroundColor = ConsoleColor . Red ;
7898 Console . Write (
79- $ "Could not load your { ( imageCount == 1 ? "image " : "images " ) } :") ;
99+ $ "Could not load your { ( mediaCount == 1 ? "media " : "medias " ) } :") ;
80100 Console . Write ( $ "{ exception . Message } ") ;
81101 Console . ForegroundColor = ConsoleColor . Yellow ;
82102 Console . WriteLine ( "Please try again." ) ;
@@ -91,7 +111,7 @@ public static async Task Run()
91111 ex . Context . NativeHandle . MemorySequenceRemove ( LLamaSeqId . Zero , - 1 , - 1 ) ;
92112
93113 // Replace placeholders with media markers (one marker per image)
94- foreach ( var path in imagePathsWithCurlyBraces )
114+ foreach ( var path in mediaPathsWithCurlyBraces )
95115 {
96116 prompt = prompt . Replace ( path , mediaMarker , StringComparison . Ordinal ) ;
97117 }
@@ -100,11 +120,11 @@ public static async Task Run()
100120 Console . WriteLine ( $ "Here are the images, that are sent to the chat model in addition to your message.") ;
101121 Console . WriteLine ( ) ;
102122
103- // foreach (var consoleImage in bitmaps .Select(embed => new CanvasImage(embed.GetDataSpan() .ToArray())))
104- // {
105- // consoleImage.MaxWidth = 50;
106- // AnsiConsole.Write(consoleImage);
107- // }
123+ foreach ( var consoleImage in imageList . Select ( image => new CanvasImage ( image . ToArray ( ) ) ) )
124+ {
125+ consoleImage . MaxWidth = 50 ;
126+ AnsiConsole . Write ( consoleImage ) ;
127+ }
108128
109129 Console . WriteLine ( ) ;
110130 Console . ForegroundColor = ConsoleColor . Yellow ;
@@ -116,7 +136,7 @@ public static async Task Run()
116136 // Initialize Images in executor
117137 //
118138 ex . Embeds . Clear ( ) ;
119- foreach ( var embed in bitmaps )
139+ foreach ( var embed in embeds )
120140 ex . Embeds . Add ( embed ) ;
121141 }
122142
0 commit comments