Process batch images one by one to lower VRAM usage

saddam213 · saddam213 · commit 8119c0156ee4 · 2023-10-28T22:27:48.000+13:00
diff --git a/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs b/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs
@@ -113,7 +113,7 @@ public virtual async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelOp
                 }
 
                 // Decode Latents
-                return await DecodeLatents(modelOptions, schedulerOptions, latents);
+                return await DecodeLatents(modelOptions, promptOptions, schedulerOptions, latents);
             }
         }
 
@@ -123,26 +123,42 @@ public virtual async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelOp
         /// <param name="options">The options.</param>
         /// <param name="latents">The latents.</param>
         /// <returns></returns>
-        protected async Task<DenseTensor<float>> DecodeLatents(IModelOptions model, SchedulerOptions options, DenseTensor<float> latents)
+        protected async Task<DenseTensor<float>> DecodeLatents(IModelOptions model, PromptOptions prompt, SchedulerOptions options, DenseTensor<float> latents)
         {
             // Scale and decode the image latents with vae.
             latents = latents.MultiplyBy(1.0f / model.ScaleFactor);
 
-            var inputNames = _onnxModelService.GetInputNames(model, OnnxModelType.VaeDecoder);
-            var inputParameters = CreateInputParameters(NamedOnnxValue.CreateFromTensor(inputNames[0], latents));
-
-            // Run inference.
-            using (var inferResult = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputParameters))
+            var images = prompt.BatchCount > 1 
+                ? latents.Split(prompt.BatchCount) 
+                : new[] { latents };
+            var imageTensors = new List<DenseTensor<float>>();
+            foreach (var image in images)
             {
-                var resultTensor = inferResult.FirstElementAs<DenseTensor<float>>();
-                if (await _onnxModelService.IsEnabledAsync(model, OnnxModelType.SafetyChecker))
+                var inputNames = _onnxModelService.GetInputNames(model, OnnxModelType.VaeDecoder);
+                var inputParameters = CreateInputParameters(NamedOnnxValue.CreateFromTensor(inputNames[0], image));
+
+                // Run inference.
+                using (var inferResult = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputParameters))
                 {
-                    // Check if image contains NSFW content, 
-                    if (!await IsImageSafe(model, options, resultTensor))
-                        return resultTensor.CloneEmpty().ToDenseTensor(); //TODO: blank image?, exception?, null?
+                    var resultTensor = inferResult.FirstElementAs<DenseTensor<float>>();
+                    if (await _onnxModelService.IsEnabledAsync(model, OnnxModelType.SafetyChecker))
+                    {
+                        // Check if image contains NSFW content, 
+                        if (!await IsImageSafe(model, options, resultTensor))
+                        {
+                            //TODO: blank image?, exception?, null?
+                            imageTensors.Add(resultTensor.CloneEmpty().ToDenseTensor());
+                            continue;
+                        }
+                    }
+
+                    if (prompt.BatchCount == 1)
+                        return resultTensor.ToDenseTensor();
+
+                    imageTensors.Add(resultTensor.ToDenseTensor());
                 }
-                return resultTensor.ToDenseTensor();
             }
+            return imageTensors.Join();
         }
 
 
diff --git a/OnnxStack.StableDiffusion/Diffusers/InpaintDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/InpaintDiffuser.cs
@@ -94,7 +94,7 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
                 }
 
                 // Decode Latents
-                return await DecodeLatents(modelOptions, schedulerOptions, latents);
+                return await DecodeLatents(modelOptions, promptOptions, schedulerOptions, latents);
             }
         }
 
diff --git a/OnnxStack.StableDiffusion/Diffusers/InpaintLegacyDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/InpaintLegacyDiffuser.cs
@@ -95,7 +95,7 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO
                 }
 
                 // Decode Latents
-                return await DecodeLatents(modelOptions, schedulerOptions, latents);
+                return await DecodeLatents(modelOptions, promptOptions, schedulerOptions, latents);
             }
         }
 
diff --git a/OnnxStack.StableDiffusion/Helpers/TensorHelper.cs b/OnnxStack.StableDiffusion/Helpers/TensorHelper.cs
@@ -1,5 +1,6 @@
 ﻿using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
+using System.Collections.Generic;
 using System.Linq;
 
 namespace OnnxStack.StableDiffusion.Helpers
@@ -366,6 +367,33 @@ public static DenseTensor<float>[] Split(this DenseTensor<float> tensor, int cou
         }
 
 
+        /// <summary>
+        /// Joins the tensors across the 0 axis.
+        /// </summary>
+        /// <param name="tensors">The tensors.</param>
+        /// <param name="axis">The axis.</param>
+        /// <returns></returns>
+        /// <exception cref="System.NotImplementedException">Only axis 0 is supported</exception>
+        public static DenseTensor<float> Join(this IList<DenseTensor<float>> tensors, int axis = 0)
+        {
+            if (axis != 0)
+                throw new NotImplementedException("Only axis 0 is supported");
+
+            var tensor = tensors.First();
+            var dimensions = tensor.Dimensions.ToArray();
+            dimensions[0] *= tensors.Count;
+
+            var newLength = (int)tensor.Length;
+            var buffer = new float[newLength * tensors.Count].AsMemory();
+            for (int i = 0; i < tensors.Count(); i++)
+            {
+                var start = i * newLength;
+                tensors[i].Buffer.CopyTo(buffer[start..]);
+            }
+            return new DenseTensor<float>(buffer, dimensions);
+        }
+
+
         /// <summary>
         /// Adds the tensors.
         /// </summary>

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO`
`94`	`94`	`}`
`95`	`95`
`96`	`96`	`// Decode Latents`
`97`		`- return await DecodeLatents(modelOptions, schedulerOptions, latents);`
	`97`	`+ return await DecodeLatents(modelOptions, promptOptions, schedulerOptions, latents);`
`98`	`98`	`}`
`99`	`99`	`}`
`100`	`100`
Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ public override async Task<DenseTensor<float>> DiffuseAsync(IModelOptions modelO`
`95`	`95`	`}`
`96`	`96`
`97`	`97`	`// Decode Latents`
`98`		`- return await DecodeLatents(modelOptions, schedulerOptions, latents);`
	`98`	`+ return await DecodeLatents(modelOptions, promptOptions, schedulerOptions, latents);`
`99`	`99`	`}`
`100`	`100`	`}`
`101`	`101`