InpaintLegacyDiffuser implemented

saddam213 · saddam213 · commit cb3152a3c531 · 2023-11-17T22:18:03.000+13:00
diff --git a/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json b/Assets/Templates/LCM-Dreamshaper-V7/LCM-Dreamshaper-V7-ONNX.json
@@ -13,7 +13,8 @@
   "PipelineType": "LatentConsistency",
   "Diffusers": [
     "TextToImage",
-    "ImageToImage"
+    "ImageToImage",
+    "ImageInpaintLegacy"
   ],
   "ModelFiles": [
 	  "https://huggingface.co/TheyCallMeHex/LCM-Dreamshaper-V7-ONNX/resolve/main/tokenizer/model.onnx",
diff --git a/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/InpaintLegacyDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/InpaintLegacyDiffuser.cs
@@ -1,16 +1,21 @@
 ﻿using Microsoft.Extensions.Logging;
-using Microsoft.ML.OnnxRuntime;
 using Microsoft.ML.OnnxRuntime.Tensors;
+using OnnxStack.Core;
 using OnnxStack.Core.Config;
+using OnnxStack.Core.Model;
 using OnnxStack.Core.Services;
 using OnnxStack.StableDiffusion.Common;
 using OnnxStack.StableDiffusion.Config;
 using OnnxStack.StableDiffusion.Enums;
 using OnnxStack.StableDiffusion.Helpers;
 using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.Processing;
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
 
 namespace OnnxStack.StableDiffusion.Diffusers.LatentConsistency
 {
@@ -40,7 +45,7 @@ public InpaintLegacyDiffuser(IOnnxModelService onnxModelService, IPromptService
         /// <param name="options">The options.</param>
         /// <param name="scheduler">The scheduler.</param>
         /// <returns></returns>
-        protected override IReadOnlyList<int> GetTimesteps(PromptOptions prompt, SchedulerOptions options, IScheduler scheduler)
+        protected override IReadOnlyList<int> GetTimesteps(SchedulerOptions options, IScheduler scheduler)
         {
             // Image2Image we narrow step the range by the Strength
             var inittimestep = Math.Min((int)(options.InferenceSteps * options.Strength), options.InferenceSteps);
@@ -50,31 +55,202 @@ protected override IReadOnlyList<int> GetTimesteps(PromptOptions prompt, Schedul
 
 
         /// <summary>
-        /// Prepares the latents for inference.
+        /// Runs the scheduler steps.
         /// </summary>
+        /// <param name="modelOptions">The model options.</param>
+        /// <param name="promptOptions">The prompt options.</param>
+        /// <param name="schedulerOptions">The scheduler options.</param>
+        /// <param name="promptEmbeddings">The prompt embeddings.</param>
+        /// <param name="performGuidance">if set to <c>true</c> [perform guidance].</param>
+        /// <param name="progressCallback">The progress callback.</param>
+        /// <param name="cancellationToken">The cancellation token.</param>
+        /// <returns></returns>
+        protected override async Task<DenseTensor<float>> SchedulerStepAsync(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions, DenseTensor<float> promptEmbeddings, bool performGuidance, Action<int, int> progressCallback = null, CancellationToken cancellationToken = default)
+        {
+            using (var scheduler = GetScheduler(schedulerOptions))
+            {
+                // Get timesteps
+                var timesteps = GetTimesteps(schedulerOptions, scheduler);
+
+                // Create latent sample
+                var latentsOriginal = await PrepareLatentsAsync(modelOptions, promptOptions, schedulerOptions, scheduler, timesteps);
+
+                // Create masks sample
+                var maskImage = PrepareMask(modelOptions, promptOptions, schedulerOptions);
+
+                // Generate some noise
+                var noise = scheduler.CreateRandomSample(latentsOriginal.Dimensions);
+
+                // Add noise to original latent
+                var latents = scheduler.AddNoise(latentsOriginal, noise, timesteps);
+
+                // Get Model metadata
+                var metadata = _onnxModelService.GetModelMetadata(modelOptions, OnnxModelType.Unet);
+
+                // Get Guidance Scale Embedding
+                var guidanceEmbeddings = GetGuidanceScaleEmbedding(schedulerOptions.GuidanceScale);
+
+                // Denoised result
+                DenseTensor<float> denoised = null;
+
+                // Loop though the timesteps
+                var step = 0;
+                foreach (var timestep in timesteps)
+                {
+                    step++;
+                    var stepTime = Stopwatch.GetTimestamp();
+                    cancellationToken.ThrowIfCancellationRequested();
+
+                    // Create input tensor.
+                    var inputTensor = scheduler.ScaleInput(latents, timestep);
+                    var timestepTensor = CreateTimestepTensor(timestep);
+
+                    var outputChannels = 1;
+                    var outputDimension = schedulerOptions.GetScaledDimension(outputChannels);
+                    using (var inferenceParameters = new OnnxInferenceParameters(metadata))
+                    {
+                        inferenceParameters.AddInputTensor(inputTensor);
+                        inferenceParameters.AddInputTensor(timestepTensor);
+                        inferenceParameters.AddInputTensor(promptEmbeddings);
+                        inferenceParameters.AddInputTensor(guidanceEmbeddings);
+                        inferenceParameters.AddOutputBuffer(outputDimension);
+
+                        var results = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.Unet, inferenceParameters);
+                        using (var result = results.First())
+                        {
+                            var noisePred = result.ToDenseTensor();
+
+                            // Scheduler Step
+                            var schedulerResult = scheduler.Step(noisePred, timestep, latents);
+
+                            latents = schedulerResult.Result;
+                            denoised = schedulerResult.SampleData;
+
+                            // Add noise to original latent
+                            if (step < timesteps.Count - 1)
+                            {
+                                var noiseTimestep = timesteps[step + 1];
+                                var initLatentsProper = scheduler.AddNoise(latentsOriginal, noise, new[] { noiseTimestep });
+
+                                // Apply mask and combine 
+                                latents = ApplyMaskedLatents(schedulerResult.Result, initLatentsProper, maskImage);
+                            }
+                        }
+                    }
+
+                    progressCallback?.Invoke(step, timesteps.Count);
+                    _logger?.LogEnd($"Step {step}/{timesteps.Count}", stepTime);
+                }
+
+                // Decode Latents
+                return await DecodeLatentsAsync(modelOptions, promptOptions, schedulerOptions, denoised);
+            }
+        }
+
+
+        /// <summary>
+        /// Prepares the input latents for inference.
+        /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="prompt">The prompt.</param>
         /// <param name="options">The options.</param>
         /// <param name="scheduler">The scheduler.</param>
+        /// <param name="timesteps">The timesteps.</param>
         /// <returns></returns>
-        protected override DenseTensor<float> PrepareLatents(IModelOptions model, PromptOptions prompt, SchedulerOptions options, IScheduler scheduler, IReadOnlyList<int> timesteps)
+        protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOptions model, PromptOptions prompt, SchedulerOptions options, IScheduler scheduler, IReadOnlyList<int> timesteps)
         {
             // Image input, decode, add noise, return as latent 0
             var imageTensor = prompt.InputImage.ToDenseTensor(new[] { 1, 3, options.Height, options.Width });
-            var inputNames = _onnxModelService.GetInputNames(model, OnnxModelType.VaeEncoder);
-            var inputParameters = CreateInputParameters(NamedOnnxValue.CreateFromTensor(inputNames[0], imageTensor));
-            using (var inferResult = _onnxModelService.RunInference(model, OnnxModelType.VaeEncoder, inputParameters))
+
+            //TODO: Model Config, Channels
+            var outputDimensions = options.GetScaledDimension();
+            var metadata = _onnxModelService.GetModelMetadata(model, OnnxModelType.VaeEncoder);
+            using (var inferenceParameters = new OnnxInferenceParameters(metadata))
             {
-                var sample = inferResult.FirstElementAs<DenseTensor<float>>();
-                var scaledSample = sample
-                    .Add(scheduler.CreateRandomSample(sample.Dimensions, options.InitialNoiseLevel))
-                    .MultiplyBy(model.ScaleFactor);
+                inferenceParameters.AddInputTensor(imageTensor);
+                inferenceParameters.AddOutputBuffer(outputDimensions);
+
+                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inferenceParameters);
+                using (var result = results.First())
+                {
+                    var outputResult = result.ToDenseTensor();
+                    var scaledSample = outputResult
+                       .Add(scheduler.CreateRandomSample(outputDimensions, options.InitialNoiseLevel))
+                       .MultiplyBy(model.ScaleFactor);
+
+                    return scaledSample;
+                }
+            }
+        }
+
 
-                var noisySample = scheduler.AddNoise(scaledSample, scheduler.CreateRandomSample(scaledSample.Dimensions), timesteps);
-                if (prompt.BatchCount > 1)
-                    return noisySample.Repeat(prompt.BatchCount);
+        /// <summary>
+        /// Prepares the mask.
+        /// </summary>
+        /// <param name="promptOptions">The prompt options.</param>
+        /// <param name="schedulerOptions">The scheduler options.</param>
+        /// <returns></returns>
+        private DenseTensor<float> PrepareMask(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions)
+        {
+            using (var mask = promptOptions.InputImageMask.ToImage())
+            {
+                // Prepare the mask
+                int width = schedulerOptions.GetScaledWidth();
+                int height = schedulerOptions.GetScaledHeight();
+                mask.Mutate(x => x.Grayscale());
+                mask.Mutate(x => x.Resize(new Size(width, height), KnownResamplers.NearestNeighbor, true));
+                var maskTensor = new DenseTensor<float>(new[] { 1, 4, width, height });
+                mask.ProcessPixelRows(img =>
+                {
+                    for (int x = 0; x < width; x++)
+                    {
+                        for (int y = 0; y < height; y++)
+                        {
+                            var pixelSpan = img.GetRowSpan(y);
+                            var value = pixelSpan[x].A / 255.0f;
+                            maskTensor[0, 0, y, x] = 1f - value;
+                            maskTensor[0, 1, y, x] = 0f; // Needed for shape only
+                            maskTensor[0, 2, y, x] = 0f; // Needed for shape only
+                            maskTensor[0, 3, y, x] = 0f; // Needed for shape only
+                        }
+                    }
+                });
+
+                return maskTensor;
+            }
+        }
+
+
+        /// <summary>
+        /// Applies the masked latents.
+        /// </summary>
+        /// <param name="latents">The latents.</param>
+        /// <param name="initLatentsProper">The initialize latents proper.</param>
+        /// <param name="mask">The mask.</param>
+        /// <returns></returns>
+        private DenseTensor<float> ApplyMaskedLatents(DenseTensor<float> latents, DenseTensor<float> initLatentsProper, DenseTensor<float> mask)
+        {
+            var result = new DenseTensor<float>(latents.Dimensions);
+            for (int batch = 0; batch < latents.Dimensions[0]; batch++)
+            {
+                for (int channel = 0; channel < latents.Dimensions[1]; channel++)
+                {
+                    for (int height = 0; height < latents.Dimensions[2]; height++)
+                    {
+                        for (int width = 0; width < latents.Dimensions[3]; width++)
+                        {
+                            float maskValue = mask[batch, 0, height, width];
+                            float latentsValue = latents[batch, channel, height, width];
+                            float initLatentsProperValue = initLatentsProper[batch, channel, height, width];
 
-                return noisySample;
+                            //Apply the logic to compute the result based on the mask
+                            float newValue = initLatentsProperValue * maskValue + latentsValue * (1f - maskValue);
+                            result[batch, channel, height, width] = newValue;
+                        }
+                    }
+                }
             }
+            return result;
         }
     }
 }
diff --git a/OnnxStack.UI/appsettings.json b/OnnxStack.UI/appsettings.json
@@ -70,7 +70,8 @@
         "PipelineType": "LatentConsistency",
         "Diffusers": [
           "TextToImage",
-          "ImageToImage"
+          "ImageToImage",
+          "ImageInpaintLegacy"
         ],
         "ModelFiles": [
           "https://huggingface.co/TheyCallMeHex/LCM-Dreamshaper-V7-ONNX/resolve/main/tokenizer/model.onnx",