Add florence task descriptions

saddam213 · saddam213 · commit f1471bbe9839 · 2025-09-25T16:26:26.000+12:00
diff --git a/.gitignore b/.gitignore
@@ -350,7 +350,7 @@ site/
 docker-test-output/*
 
 Examples/*
-TensorStack.UI.WPF/*
+TensorStack.WPF/*
 TensorStackFull.sln
 TensorStack.Diffusers/*
 TensorStudio/*
diff --git a/TensorStack.Common/ExecutionProvider.cs b/TensorStack.Common/ExecutionProvider.cs
@@ -0,0 +1,26 @@
+﻿// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+using System;
+using Microsoft.ML.OnnxRuntime;
+
+namespace TensorStack.Common
+{
+    public class ExecutionProvider
+    {
+        private readonly string _name;
+        private readonly Func<ModelConfig, SessionOptions> _sessionOptionsFactory;
+
+        public ExecutionProvider(string name, Func<ModelConfig, SessionOptions> sessionOptionsFactory)
+        {
+            _name = name;
+            _sessionOptionsFactory = sessionOptionsFactory;
+        }
+
+        public string Name => _name;
+
+        public SessionOptions CreateSession(ModelConfig modelConfig)
+        {
+            return _sessionOptionsFactory(modelConfig);
+        }
+    }
+}
diff --git a/TensorStack.Common/ModelConfig.cs b/TensorStack.Common/ModelConfig.cs
@@ -1,8 +1,6 @@
 ﻿// Copyright (c) TensorStack. All rights reserved.
 // Licensed under the Apache 2.0 License.
-using System;
 using System.Text.Json.Serialization;
-using Microsoft.ML.OnnxRuntime;
 
 namespace TensorStack.Common
 {
@@ -22,23 +20,4 @@ public virtual void SetProvider(ExecutionProvider executionProvider)
             ExecutionProvider = executionProvider;
         }
     }
-
-    public class ExecutionProvider
-    {
-        private readonly string _name;
-        private readonly Func<ModelConfig, SessionOptions> _sessionOptionsFactory;
-
-        public ExecutionProvider(string name, Func<ModelConfig, SessionOptions> sessionOptionsFactory)
-        {
-            _name = name;
-            _sessionOptionsFactory = sessionOptionsFactory;
-        }
-
-        public string Name => _name;
-
-        public SessionOptions CreateSession(ModelConfig modelConfig)
-        {
-            return _sessionOptionsFactory(modelConfig);
-        }
-    }
 }
diff --git a/TensorStack.TextGeneration/ITextGeneration.cs b/TensorStack.TextGeneration/ITextGeneration.cs
@@ -0,0 +1,11 @@
+﻿using TensorStack.Common.Pipeline;
+using TensorStack.TextGeneration.Common;
+
+namespace TensorStack.TextGeneration
+{
+    public interface ITextGeneration :
+        IPipeline<GenerateResult, GenerateOptions>,
+        IPipeline<GenerateResult[], SearchOptions>
+    {
+    }
+}
diff --git a/TensorStack.TextGeneration/Pipelines/DecoderPipeline.cs b/TensorStack.TextGeneration/Pipelines/DecoderPipeline.cs
@@ -29,9 +29,8 @@ public abstract class DecoderPipeline : IDisposable
         /// <param name="decoderConfig">The decoder configuration.</param>
         public DecoderPipeline(ITokenizer tokenizer, DecoderConfig decoderConfig)
         {
-            _decoderConfig = decoderConfig;
-     
             _tokenizer = tokenizer;
+            _decoderConfig = decoderConfig;
             _decoder = new ModelSession(_decoderConfig);
             _sequenceComparer = new SequenceComparer(_tokenizer.SpecialTokens, 5);
         }
@@ -126,7 +125,7 @@ protected virtual async Task<Sequence> GreedySearchAsync(GenerateOptions options
                     logitsProcessor.Process(sequence.Tokens, logits);
 
                 // Sample
-                var sample = sampler.Sample(logits,  temperature: options.Temperature).First();
+                var sample = sampler.Sample(logits, temperature: options.Temperature).First();
                 sequence.Tokens.Add(sample.TokenId);
                 sequence.Score += sample.Score;
 
diff --git a/TensorStack.TextGeneration/Pipelines/EncoderDecoderPipeline.cs b/TensorStack.TextGeneration/Pipelines/EncoderDecoderPipeline.cs
@@ -1,22 +1,16 @@
 // Copyright (c) TensorStack. All rights reserved.
 // Licensed under the Apache 2.0 License.
-using System;
-using System.Collections.Generic;
 using System.Linq;
-using System.Runtime.CompilerServices;
 using System.Threading;
 using System.Threading.Tasks;
 using TensorStack.Common;
-using TensorStack.Common.Pipeline;
 using TensorStack.Common.Tensor;
 using TensorStack.TextGeneration.Common;
 using TensorStack.TextGeneration.Processing;
 
 namespace TensorStack.TextGeneration.Pipelines
 {
-    public abstract class EncoderDecoderPipeline : DecoderPipeline,
-        IPipeline<GenerateResult, GenerateOptions>,
-        IPipelineStream<GenerateResult, SearchOptions>
+    public abstract class EncoderDecoderPipeline : DecoderPipeline
     {
         /// <summary>
         /// Initializes a new instance of the <see cref="EncoderDecoderPipeline"/> class.
@@ -57,55 +51,6 @@ public override async Task UnloadAsync(CancellationToken cancellationToken = def
             await Encoder.UnloadAsync();
         }
 
-        /// <summary>
-        /// Run pipeline GreedySearch
-        /// </summary>
-        /// <param name="options">The options.</param>
-        /// <param name="progressCallback">The progress callback.</param>
-        /// <param name="cancellationToken">The cancellation token that can be used by other objects or threads to receive notice of cancellation.</param>
-        /// <returns>A Task&lt;GenerateResult&gt; representing the asynchronous operation.</returns>
-        public virtual async Task<GenerateResult> RunAsync(GenerateOptions options, IProgress<RunProgress> progressCallback = null, CancellationToken cancellationToken = default)
-        {
-            await TokenizePromptAsync(options);
-
-            var sequence = await GreedySearchAsync(options, cancellationToken);
-            using (sequence)
-            {
-                return new GenerateResult
-                {
-                    Score = sequence.Score,
-                    Result = Tokenizer.Decode(sequence.Tokens)
-                };
-            }
-        }
-
-
-        /// <summary>
-        /// Run pipeline BeamSearch
-        /// </summary>
-        /// <param name="options">The options.</param>
-        /// <param name="progressCallback">The progress callback.</param>
-        /// <param name="cancellationToken">The cancellation token that can be used by other objects or threads to receive notice of cancellation.</param>
-        /// <returns>A Task&lt;IAsyncEnumerable`1&gt; representing the asynchronous operation.</returns>
-        public virtual async IAsyncEnumerable<GenerateResult> RunAsync(SearchOptions options, IProgress<RunProgress> progressCallback = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
-        {
-            await TokenizePromptAsync(options);
-
-            var sequences = await BeamSearchAsync(options, cancellationToken);
-            foreach (var sequence in sequences)
-            {
-                using (sequence)
-                {
-                    yield return new GenerateResult
-                    {
-                        Beam = sequence.Id,
-                        Score = sequence.Score,
-                        Result = Tokenizer.Decode(sequence.Tokens)
-                    };
-                }
-            }
-        }
-
 
         /// <summary>
         /// Tokenize the prompt
diff --git a/TensorStack.TextGeneration/Pipelines/Florence/FlorenceOptions.cs b/TensorStack.TextGeneration/Pipelines/Florence/FlorenceOptions.cs
@@ -1,5 +1,6 @@
 ﻿// Copyright (c) TensorStack. All rights reserved.
 // Licensed under the Apache 2.0 License.
+using System.ComponentModel.DataAnnotations;
 using TensorStack.Common.Tensor;
 using TensorStack.Common.Vision;
 using TensorStack.TextGeneration.Common;
@@ -13,32 +14,62 @@ public record FlorenceOptions : GenerateOptions
         public CoordinateBox<float> Region { get; set; }
     }
 
+
     public record FlorenceSearchOptions : FlorenceOptions
     {
-        public FlorenceSearchOptions(){ }
+        public FlorenceSearchOptions() { }
         public FlorenceSearchOptions(FlorenceOptions options) : base(options) { }
     }
 
 
-
-
     public enum TaskType
     {
+        [Display(Name = "None", Description = "Free text prompt without a predefined task.")]
         NONE,
+
+        [Display(Name = "OCR", Description = "Reads all the visible text in an image.")]
         OCR,
+
+        [Display(Name = "OCR with Region", Description = "Reads text in an image and also gives the region where each piece of text is found.")]
         OCR_WITH_REGION,
+
+        [Display(Name = "Caption", Description = "Generates a short description of the overall image.")]
         CAPTION,
+
+        [Display(Name = "Detailed Caption", Description = "Produces a richer description of the image with more detail than a normal caption.")]
         DETAILED_CAPTION,
+
+        [Display(Name = "More Detailed Caption", Description = "Gives a very thorough and verbose description of the image.")]
         MORE_DETAILED_CAPTION,
+
+        [Display(Name = "Object Detection", Description = "Identifies and localizes objects in the image with bounding boxes.")]
         OD,
+
+        [Display(Name = "Dense Region Caption", Description = "Splits the image into many regions and generates a caption for each region.")]
         DENSE_REGION_CAPTION,
+
+        [Display(Name = "Caption to Phrase Grounding", Description = "Finds the specific region(s) in the image that correspond to a given phrase in a caption.")]
         CAPTION_TO_PHRASE_GROUNDING,
+
+        [Display(Name = "Referring Expression Segmentation", Description = "Given a phrase (e.g., 'the red car'), segments out that exact object region at the pixel level.")]
         REFERRING_EXPRESSION_SEGMENTATION,
+
+        [Display(Name = "Region to Segmentation", Description = "Converts a region (bounding box) into a precise pixel-level segmentation mask.")]
         REGION_TO_SEGMENTATION,
+
+        [Display(Name = "Open Vocabulary Detection", Description = "Detects objects of arbitrary categories, even ones not seen during training.")]
         OPEN_VOCABULARY_DETECTION,
+
+        [Display(Name = "Region to Category", Description = "Assigns a category label (e.g., 'cat', 'chair') to a given region.")]
         REGION_TO_CATEGORY,
+
+        [Display(Name = "Region to Description", Description = "Generates a natural-language description for a given region.")]
         REGION_TO_DESCRIPTION,
+
+        [Display(Name = "Region to OCR", Description = "Extracts text only from within a specified region.")]
         REGION_TO_OCR,
+
+        [Display(Name = "Region Proposal", Description = "Suggests candidate regions of interest in the image (without labeling them).")]
         REGION_PROPOSAL
     }
 }
diff --git a/TensorStack.TextGeneration/Pipelines/Florence/FlorencePipeline.cs b/TensorStack.TextGeneration/Pipelines/Florence/FlorencePipeline.cs
@@ -267,6 +267,26 @@ protected override void Dispose(bool disposing)
         /// <param name="visionModel">The vision model.</param>
         /// <returns>FlorencePipeline.</returns>
         public static FlorencePipeline Create(ExecutionProvider provider, string modelPath, FlorenceType modelType, string encoderModel = "encoder_model.onnx", string decoderModel = "decoder_model_merged.onnx", string embedModel = "embed_tokens.onnx", string visionModel = "vision_encoder.onnx")
+        {
+            return Create(provider, provider, provider, provider, modelPath, modelType, encoderModel, decoderModel, embedModel, visionModel);
+        }
+
+
+        /// <summary>
+        /// Creates a FlorencePipeline with the specified configuration.
+        /// </summary>
+        /// <param name="encoderProvider">The encoder provider.</param>
+        /// <param name="decoderProvider">The decoder provider.</param>
+        /// <param name="embedsProvider">The embeds provider.</param>
+        /// <param name="visionProvider">The vision provider.</param>
+        /// <param name="modelPath">The model path.</param>
+        /// <param name="modelType">Type of the model.</param>
+        /// <param name="encoderModel">The encoder model.</param>
+        /// <param name="decoderModel">The decoder model.</param>
+        /// <param name="embedModel">The embed model.</param>
+        /// <param name="visionModel">The vision model.</param>
+        /// <returns>FlorencePipeline.</returns>
+        public static FlorencePipeline Create(ExecutionProvider encoderProvider, ExecutionProvider decoderProvider, ExecutionProvider embedsProvider, ExecutionProvider visionProvider, string modelPath, FlorenceType modelType, string encoderModel = "encoder_model.onnx", string decoderModel = "decoder_model_merged.onnx", string embedModel = "embed_tokens.onnx", string visionModel = "vision_encoder.onnx")
         {
             var numLayers = 6;
             var numHeads = 12;
@@ -316,36 +336,11 @@ public static FlorencePipeline Create(ExecutionProvider provider, string modelPa
                 }
             };
 
-            config.EncoderConfig.SetProvider(provider);
-            //config.DecoderConfig.SetProvider(provider);
-            config.DecoderConfig.SetProvider(ProviderCPU()); // TODO
-            config.EmbedsConfig.SetProvider(provider);
-            config.VisionConfig.SetProvider(provider);
+            config.EncoderConfig.SetProvider(encoderProvider);
+            config.DecoderConfig.SetProvider(decoderProvider);
+            config.EmbedsConfig.SetProvider(embedsProvider);
+            config.VisionConfig.SetProvider(visionProvider);
             return new FlorencePipeline(config);
         }
-
-
-        private static ExecutionProvider ProviderCPU()
-        {
-            return new ExecutionProvider("CPU", configuration =>
-            {
-                var sessionOptions = new SessionOptions
-                {
-                    ExecutionMode = ExecutionMode.ORT_PARALLEL,
-                    EnableCpuMemArena = true,
-                    EnableMemoryPattern = true,
-                    GraphOptimizationLevel = GraphOptimizationLevel.ORT_DISABLE_ALL
-                };
-
-                sessionOptions.AppendExecutionProvider_CPU();
-                return sessionOptions;
-            });
-        }
-    }
-
-    public enum FlorenceType
-    {
-        Base = 0,
-        Large = 1
     }
 }
diff --git a/TensorStack.TextGeneration/Pipelines/Florence/FlorenceType.cs b/TensorStack.TextGeneration/Pipelines/Florence/FlorenceType.cs
@@ -0,0 +1,10 @@
+// Copyright (c) TensorStack. All rights reserved.
+// Licensed under the Apache 2.0 License.
+namespace TensorStack.TextGeneration.Pipelines.Florence
+{
+    public enum FlorenceType
+    {
+        Base = 0,
+        Large = 1
+    }
+}
diff --git a/TensorStack.TextGeneration/Pipelines/Other/SummaryPipeline.cs b/TensorStack.TextGeneration/Pipelines/Other/SummaryPipeline.cs
diff --git a/TensorStack.TextGeneration/Pipelines/Phi/Phi3Pipeline.cs b/TensorStack.TextGeneration/Pipelines/Phi/Phi3Pipeline.cs
diff --git a/TensorStack.TextGeneration/Pipelines/Phi/PhiType.cs b/TensorStack.TextGeneration/Pipelines/Phi/PhiType.cs