Support disabled classifier free guidance

saddam213 · saddam213 · commit c20a291eefb0 · 2023-10-28T17:52:36.000+13:00
diff --git a/OnnxStack.StableDiffusion/Common/IPromptService.cs b/OnnxStack.StableDiffusion/Common/IPromptService.cs
@@ -1,11 +1,12 @@
 ﻿using Microsoft.ML.OnnxRuntime.Tensors;
+using OnnxStack.StableDiffusion.Config;
 using System.Threading.Tasks;
 
 namespace OnnxStack.StableDiffusion.Common
 {
     public interface IPromptService
     {
-        Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, string prompt, string negativePrompt);
+        Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, PromptOptions promptOptions, SchedulerOptions schedulerOptions);
         Task<int[]> DecodeTextAsync(IModelOptions model, string inputText);
         Task<float[]> EncodeTokensAsync(IModelOptions model, int[] tokenizedInput);
     }
diff --git a/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs b/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs
@@ -69,7 +69,10 @@ public virtual async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelOp
             using (var scheduler = GetScheduler(promptOptions, schedulerOptions))
             {
                 // Process prompts
-                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions.Prompt, promptOptions.NegativePrompt);
+                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions, schedulerOptions);
+
+                // Should we perform classifier free guidance
+                var performGuidance = schedulerOptions.GuidanceScale > 1.0f;
 
                 // Get timesteps
                 var timesteps = GetTimesteps(promptOptions, schedulerOptions, scheduler);
@@ -84,25 +87,22 @@ public virtual async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelOp
                     cancellationToken.ThrowIfCancellationRequested();
 
                     // Create input tensor.
-                    var inputTensor = scheduler.ScaleInput(latents.Duplicate(schedulerOptions.GetScaledDimension(2)), timestep);
+                    var inputLatent = performGuidance
+                        ? latents.Repeat(1)
+                        : latents;
+                    var inputTensor = scheduler.ScaleInput(inputLatent, timestep);
 
                     // Create Input Parameters
-                    var inputNames = _onnxModelService.GetInputNames(modelOptions, OnnxModelType.Unet);
-                    var inputParameters = CreateInputParameters(
-                         NamedOnnxValue.CreateFromTensor(inputNames[0], inputTensor),
-                         NamedOnnxValue.CreateFromTensor(inputNames[1], new DenseTensor<long>(new long[] { timestep }, new int[] { 1 })),
-                         NamedOnnxValue.CreateFromTensor(inputNames[2], promptEmbeddings));
+                    var inputParameters = CreateUnetInputParams(modelOptions, inputTensor, promptEmbeddings, timestep);
 
                     // Run Inference
                     using (var inferResult = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.Unet, inputParameters))
                     {
                         var noisePred = inferResult.FirstElementAs<DenseTensor<float>>();
 
                         // Perform guidance
-                        if (schedulerOptions.GuidanceScale > 1.0f)
-                        {
+                        if (performGuidance)
                             noisePred = PerformGuidance(noisePred, schedulerOptions.GuidanceScale);
-                        }
 
                         // Scheduler Step
                         latents = scheduler.Step(noisePred, timestep, latents);
@@ -199,6 +199,24 @@ protected DenseTensor<float> PerformGuidance(DenseTensor<float> noisePrediction,
         }
 
 
+        /// <summary>
+        /// Creates the Unet input parameters.
+        /// </summary>
+        /// <param name="model">The model.</param>
+        /// <param name="inputTensor">The input tensor.</param>
+        /// <param name="promptEmbeddings">The prompt embeddings.</param>
+        /// <param name="timestep">The timestep.</param>
+        /// <returns></returns>
+        protected virtual IReadOnlyList<NamedOnnxValue> CreateUnetInputParams(IModelOptions model, DenseTensor<float> inputTensor, DenseTensor<float> promptEmbeddings, int timestep)
+        {
+            var inputNames = _onnxModelService.GetInputNames(model, OnnxModelType.Unet);
+            return CreateInputParameters(
+                 NamedOnnxValue.CreateFromTensor(inputNames[0], inputTensor),
+                 NamedOnnxValue.CreateFromTensor(inputNames[1], new DenseTensor<long>(new long[] { timestep }, new int[] { 1 })),
+                 NamedOnnxValue.CreateFromTensor(inputNames[2], promptEmbeddings));
+        }
+
+
         /// <summary>
         /// Determines whether the specified result image is not NSFW.
         /// </summary>
@@ -286,9 +304,9 @@ protected static IScheduler GetScheduler(PromptOptions prompt, SchedulerOptions
         /// </summary>
         /// <param name="parameters">The parameters.</param>
         /// <returns></returns>
-        protected static IReadOnlyCollection<NamedOnnxValue> CreateInputParameters(params NamedOnnxValue[] parameters)
+        protected static IReadOnlyList<NamedOnnxValue> CreateInputParameters(params NamedOnnxValue[] parameters)
         {
-            return parameters.ToList().AsReadOnly();
+            return parameters.ToList();
         }
     }
 }
diff --git a/OnnxStack.StableDiffusion/Diffusers/InpaintDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/InpaintDiffuser.cs
@@ -44,7 +44,10 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
             using (var scheduler = GetScheduler(promptOptions, schedulerOptions))
             {
                 // Process prompts
-                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions.Prompt, promptOptions.NegativePrompt);
+                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions, schedulerOptions);
+
+                // Should we perform classifier free guidance
+                var performGuidance = schedulerOptions.GuidanceScale > 1.0f;
 
                 // Get timesteps
                 var timesteps = GetTimesteps(promptOptions, schedulerOptions, scheduler);
@@ -67,26 +70,23 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
                     cancellationToken.ThrowIfCancellationRequested();
 
                     // Create input tensor.
-                    var inputTensor = scheduler.ScaleInput(latents.Duplicate(schedulerOptions.GetScaledDimension(2)), timestep);
+                    var inputLatent = performGuidance
+                        ? latents.Repeat(1)
+                        : latents;
+                    var inputTensor = scheduler.ScaleInput(inputLatent, timestep);
                     inputTensor = ConcatenateLatents(inputTensor, maskedImage, maskImage);
 
                     // Create Input Parameters
-                    var inputNames = _onnxModelService.GetInputNames(modelOptions, OnnxModelType.Unet);
-                    var inputParameters = CreateInputParameters(
-                         NamedOnnxValue.CreateFromTensor(inputNames[0], inputTensor),
-                         NamedOnnxValue.CreateFromTensor(inputNames[1], new DenseTensor<long>(new long[] { timestep }, new int[] { 1 })),
-                         NamedOnnxValue.CreateFromTensor(inputNames[2], promptEmbeddings));
+                    var inputParameters = CreateUnetInputParams(modelOptions, inputTensor, promptEmbeddings, timestep);
 
                     // Run Inference
                     using (var inferResult = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.Unet, inputParameters))
                     {
                         var noisePred = inferResult.FirstElementAs<DenseTensor<float>>();
 
                         // Perform guidance
-                        if (schedulerOptions.GuidanceScale > 1.0f)
-                        {
+                        if (performGuidance)
                             noisePred = PerformGuidance(noisePred, schedulerOptions.GuidanceScale);
-                        }
 
                         // Scheduler Step
                         latents = scheduler.Step(noisePred, timestep, latents);
diff --git a/OnnxStack.StableDiffusion/Diffusers/InpaintLegacyDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/InpaintLegacyDiffuser.cs
@@ -37,7 +37,10 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
             using (var scheduler = GetScheduler(promptOptions, schedulerOptions))
             {
                 // Process prompts
-                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions.Prompt, promptOptions.NegativePrompt);
+                var promptEmbeddings = await _promptService.CreatePromptAsync(modelOptions, promptOptions, schedulerOptions);
+
+                // Should we perform classifier free guidance
+                var performGuidance = schedulerOptions.GuidanceScale > 1.0f;
 
                 // Get timesteps
                 var timesteps = GetTimesteps(promptOptions, schedulerOptions, scheduler);
@@ -53,33 +56,30 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
 
                 // Add noise to original latent
                 var latents = scheduler.AddNoise(latentsOriginal, noise, timesteps);
-
+               
                 // Loop though the timesteps
                 var step = 0;
                 foreach (var timestep in timesteps)
                 {
                     cancellationToken.ThrowIfCancellationRequested();
 
                     // Create input tensor.
-                    var inputTensor = scheduler.ScaleInput(latents.Duplicate(schedulerOptions.GetScaledDimension(2)), timestep);
+                    var inputLatent = performGuidance
+                        ? latents.Repeat(1)
+                        : latents;
+                    var inputTensor = scheduler.ScaleInput(inputLatent, timestep);
 
                     // Create Input Parameters
-                    var inputNames = _onnxModelService.GetInputNames(modelOptions, OnnxModelType.Unet);
-                    var inputParameters = CreateInputParameters(
-                         NamedOnnxValue.CreateFromTensor(inputNames[0], inputTensor),
-                         NamedOnnxValue.CreateFromTensor(inputNames[1], new DenseTensor<long>(new long[] { timestep }, new int[] { 1 })),
-                         NamedOnnxValue.CreateFromTensor(inputNames[2], promptEmbeddings));
+                    var inputParameters = CreateUnetInputParams(modelOptions, inputTensor, promptEmbeddings, timestep);
 
                     // Run Inference
                     using (var inferResult = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.Unet, inputParameters))
                     {
                         var noisePred = inferResult.FirstElementAs<DenseTensor<float>>();
 
                         // Perform guidance
-                        if (schedulerOptions.GuidanceScale > 1.0f)
-                        {
+                        if (performGuidance)
                             noisePred = PerformGuidance(noisePred, schedulerOptions.GuidanceScale);
-                        }
 
                         // Scheduler Step
                         var steplatents = scheduler.Step(noisePred, timestep, latents);
diff --git a/OnnxStack.StableDiffusion/Helpers/TensorHelper.cs b/OnnxStack.StableDiffusion/Helpers/TensorHelper.cs
@@ -109,9 +109,6 @@ public static DenseTensor<float> AddTensors(this DenseTensor<float> tensor, Dens
         }
 
 
-
-
-
         /// <summary>
         /// Sums the tensors.
         /// </summary>
@@ -242,20 +239,9 @@ public static DenseTensor<float> Abs(this DenseTensor<float> tensor)
         public static DenseTensor<float> Multiply(this DenseTensor<float> tensor1, DenseTensor<float> tensor2)
         {
             var result = new DenseTensor<float>(tensor1.Dimensions);
-            for (int batch = 0; batch < tensor1.Dimensions[0]; batch++)
+            for (int i = 0; i < tensor1.Length; i++)
             {
-                for (int channel = 0; channel < tensor1.Dimensions[1]; channel++)
-                {
-                    for (int height = 0; height < tensor1.Dimensions[2]; height++)
-                    {
-                        for (int width = 0; width < tensor1.Dimensions[3]; width++)
-                        {
-                            var value1 = tensor1[batch, channel, height, width];
-                            var value2 = tensor2[batch, channel, height, width];
-                            result[batch, channel, height, width] = value1 * value2;
-                        }
-                    }
-                }
+                result.SetValue(i, tensor1.GetValue(i) * tensor2.GetValue(i));
             }
             return result;
         }
@@ -270,25 +256,57 @@ public static DenseTensor<float> Multiply(this DenseTensor<float> tensor1, Dense
         public static DenseTensor<float> Divide(this DenseTensor<float> tensor1, DenseTensor<float> tensor2)
         {
             var result = new DenseTensor<float>(tensor1.Dimensions);
-            for (int batch = 0; batch < tensor1.Dimensions[0]; batch++)
+            for (int i = 0; i < tensor1.Length; i++)
             {
-                for (int channel = 0; channel < tensor1.Dimensions[1]; channel++)
-                {
-                    for (int height = 0; height < tensor1.Dimensions[2]; height++)
-                    {
-                        for (int width = 0; width < tensor1.Dimensions[3]; width++)
-                        {
-                            var value1 = tensor1[batch, channel, height, width];
-                            var value2 = tensor2[batch, channel, height, width];
-                            result[batch, channel, height, width] = value1 / value2;
-                        }
-                    }
-                }
+                result.SetValue(i, tensor1.GetValue(i) / tensor2.GetValue(i));
             }
             return result;
         }
 
 
+        /// <summary>
+        /// Concatenates the specified tensors along the 0 axis.
+        /// </summary>
+        /// <param name="tensor1">The tensor1.</param>
+        /// <param name="tensor2">The tensor2.</param>
+        /// <param name="axis">The axis.</param>
+        /// <returns></returns>
+        /// <exception cref="System.NotImplementedException">Only axis 0 is supported</exception>
+        public static DenseTensor<float> Concatenate(this DenseTensor<float> tensor1, DenseTensor<float> tensor2, int axis = 0)
+        {
+            if (axis != 0)
+                throw new NotImplementedException("Only axis 0 is supported");
+
+            var dimensions = tensor1.Dimensions.ToArray();
+            dimensions[0] += tensor2.Dimensions[0];
+            return CreateTensor(tensor1.Concat(tensor2).ToArray(), dimensions);
+        }
+
+
+        /// <summary>
+        /// Repeats the specified Tensor along the 0 axis.
+        /// </summary>
+        /// <param name="tensor1">The tensor1.</param>
+        /// <param name="count">The count.</param>
+        /// <param name="axis">The axis.</param>
+        /// <returns></returns>
+        /// <exception cref="System.NotImplementedException">Only axis 0 is supported</exception>
+        public static DenseTensor<float> Repeat(this DenseTensor<float> tensor1, int count, int axis = 0)
+        {
+            if (axis != 0)
+                throw new NotImplementedException("Only axis 0 is supported");
+
+            var data = tensor1.ToArray();
+            var dimensions = tensor1.Dimensions.ToArray();
+            for (int i = 0; i < count; i++)
+            {
+                dimensions[0] += tensor1.Dimensions[0];
+                data = data.Concat(tensor1).ToArray();
+            }
+            return CreateTensor(data, dimensions);
+        }
+
+
         /// <summary>
         /// Generate a random Tensor from a normal distribution with mean 0 and variance 1
         /// </summary>
diff --git a/OnnxStack.StableDiffusion/Services/PromptService.cs b/OnnxStack.StableDiffusion/Services/PromptService.cs
@@ -1,16 +1,16 @@
-﻿using Microsoft.ML.OnnxRuntime.Tensors;
-using Microsoft.ML.OnnxRuntime;
-using OnnxStack.Core.Config;
+﻿using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
 using OnnxStack.Core;
+using OnnxStack.Core.Config;
+using OnnxStack.Core.Services;
+using OnnxStack.StableDiffusion.Common;
+using OnnxStack.StableDiffusion.Config;
 using OnnxStack.StableDiffusion.Helpers;
 using System;
 using System.Collections.Generic;
+using System.Collections.Immutable;
 using System.Linq;
-using System.Text;
 using System.Threading.Tasks;
-using OnnxStack.Core.Services;
-using OnnxStack.StableDiffusion.Common;
-using System.Collections.Immutable;
 
 namespace OnnxStack.StableDiffusion.Services
 {
@@ -35,28 +35,23 @@ public PromptService(IOnnxModelService onnxModelService)
         /// <param name="prompt">The prompt.</param>
         /// <param name="negativePrompt">The negative prompt.</param>
         /// <returns>Tensor containing all text embeds generated from the prompt and negative prompt</returns>
-        public async Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, string prompt, string negativePrompt)
+        public async Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, PromptOptions promptOptions, SchedulerOptions schedulerOptions)
         {
             // Tokenize Prompt and NegativePrompt
-            var promptTokens = await DecodeTextAsync(model, prompt);
-            var negativePromptTokens = await DecodeTextAsync(model, negativePrompt);
+            var promptTokens = await DecodeTextAsync(model, promptOptions.Prompt);
+            var negativePromptTokens = await DecodeTextAsync(model, promptOptions.NegativePrompt);
             var maxPromptTokenCount = Math.Max(promptTokens.Length, negativePromptTokens.Length);
 
-            Console.WriteLine($"Prompt -   Length: {prompt.Length}, Tokens: {promptTokens.Length}");
-            Console.WriteLine($"N-Prompt - Length: {negativePrompt?.Length}, Tokens: {negativePromptTokens.Length}");
-
             // Generate embeds for tokens
             var promptEmbeddings = await GenerateEmbedsAsync(model, promptTokens, maxPromptTokenCount);
             var negativePromptEmbeddings = await GenerateEmbedsAsync(model, negativePromptTokens, maxPromptTokenCount);
 
-            // Calculate embeddings
-            var textEmbeddings = new DenseTensor<float>(new[] { 2, promptEmbeddings.Count / model.EmbeddingsLength, model.EmbeddingsLength });
-            for (var i = 0; i < promptEmbeddings.Count; i++)
-            {
-                textEmbeddings[0, i / model.EmbeddingsLength, i % model.EmbeddingsLength] = negativePromptEmbeddings[i];
-                textEmbeddings[1, i / model.EmbeddingsLength, i % model.EmbeddingsLength] = promptEmbeddings[i];
-            }
-            return textEmbeddings;
+            // If we are doing guided diffusion, concatenate the negative prompt embeddings
+            // If not we ingore the negative prompt embeddings
+            if (schedulerOptions.GuidanceScale > 1)
+                return negativePromptEmbeddings.Concatenate(promptEmbeddings);
+
+            return promptEmbeddings;
         }
 
 
@@ -111,7 +106,7 @@ public async Task<float[]> EncodeTokensAsync(IModelOptions model, int[] tokenize
         /// <param name="inputTokens">The input tokens.</param>
         /// <param name="minimumLength">The minimum length.</param>
         /// <returns></returns>
-        private async Task<List<float>> GenerateEmbedsAsync(IModelOptions model, int[] inputTokens, int minimumLength)
+        private async Task<DenseTensor<float>> GenerateEmbedsAsync(IModelOptions model, int[] inputTokens, int minimumLength)
         {
             // If less than minimumLength pad with blank tokens
             if (inputTokens.Length < minimumLength)
@@ -124,7 +119,9 @@ private async Task<List<float>> GenerateEmbedsAsync(IModelOptions model, int[] i
                 var tokens = PadWithBlankTokens(tokenBatch, model.TokenizerLimit, model.BlankTokenValueArray);
                 embeddings.AddRange(await EncodeTokensAsync(model, tokens.ToArray()));
             }
-            return embeddings;
+
+            var dim = new[] { 1, embeddings.Count / model.EmbeddingsLength, model.EmbeddingsLength };
+            return TensorHelper.CreateTensor(embeddings.ToArray(), dim);
         }
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,12 @@`
`1`	`1`	`using Microsoft.ML.OnnxRuntime.Tensors;`
	`2`	`+using OnnxStack.StableDiffusion.Config;`
`2`	`3`	`using System.Threading.Tasks;`
`3`	`4`
`4`	`5`	`namespace OnnxStack.StableDiffusion.Common`
`5`	`6`	`{`
`6`	`7`	`public interface IPromptService`
`7`	`8`	`{`
`8`		`- Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, string prompt, string negativePrompt);`
	`9`	`+ Task<DenseTensor<float>> CreatePromptAsync(IModelOptions model, PromptOptions promptOptions, SchedulerOptions schedulerOptions);`
`9`	`10`	`Task<int[]> DecodeTextAsync(IModelOptions model, string inputText);`
`10`	`11`	`Task<float[]> EncodeTokensAsync(IModelOptions model, int[] tokenizedInput);`
`11`	`12`	`}`