Add some single input/output overloads for RunInference

saddam213 · saddam213 · commit 5cb4ff3b46ac · 2023-11-17T14:35:44.000+13:00
diff --git a/OnnxStack.Core/Services/IOnnxModelService.cs b/OnnxStack.Core/Services/IOnnxModelService.cs
@@ -61,32 +61,41 @@ public interface IOnnxModelService : IDisposable
 
 
         /// <summary>
-        /// Runs inference on the specified model.
+        /// Runs the inference Use when output size is unknown
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
-        /// <param name="inputs">The inputs.</param>
+        /// <param name="inputName">Name of the input.</param>
+        /// <param name="inputValue">The input value.</param>
+        /// <param name="outputName">Name of the output.</param>
+        /// <param name="outputValue">The output value.</param>
         /// <returns></returns>
-        IDisposableReadOnlyCollection<DisposableNamedOnnxValue> RunInference(IOnnxModel model, OnnxModelType modelType, IReadOnlyCollection<NamedOnnxValue> inputs);
+        IDisposableReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, string inputName, OrtValue inputValue, string outputName);
 
 
         /// <summary>
-        /// Runs inference on the specified model.asynchronously.
+        /// Runs the inference Use when output size is unknown
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
         /// <param name="inputs">The inputs.</param>
+        /// <param name="outputs">The outputs.</param>
         /// <returns></returns>
-        Task<IDisposableReadOnlyCollection<DisposableNamedOnnxValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, IReadOnlyCollection<NamedOnnxValue> inputs);
+        IDisposableReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, IReadOnlyCollection<string> outputs);
 
 
         /// <summary>
-        /// Runs the inference Use when output size is unknown
+        /// Runs the inference asynchronously, Use when output size is known
+        /// Output buffer size must be known and set before inference is run
         /// </summary>
         /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
-        /// <param name="inputs">The inputs.</param>
-        /// <param name="outputs">The outputs.</param>
+        /// <param name="inputName">Name of the input.</param>
+        /// <param name="inputValue">The input value.</param>
+        /// <param name="outputName">Name of the output.</param>
+        /// <param name="outputValue">The output value.</param>
         /// <returns></returns>
-        IReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, IReadOnlyCollection<string> outputs);
+        Task<IReadOnlyCollection<OrtValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, string inputName, OrtValue inputValue, string outputName, OrtValue outputValue);
 
 
         /// <summary>
diff --git a/OnnxStack.Core/Services/OnnxModelService.cs b/OnnxStack.Core/Services/OnnxModelService.cs
@@ -4,7 +4,6 @@
 using System;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
-using System.Linq;
 using System.Threading.Tasks;
 
 namespace OnnxStack.Core.Services
@@ -104,102 +103,119 @@ public Task<bool> IsEnabledAsync(IOnnxModel model, OnnxModelType modelType)
 
 
         /// <summary>
-        /// Runs inference on the specified model.
+        /// Runs the inference (Use when output size is unknown)
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
-        /// <param name="inputs">The inputs.</param>
+        /// <param name="inputName">Name of the input.</param>
+        /// <param name="inputValue">The input value.</param>
+        /// <param name="outputName">Name of the output.</param>
         /// <returns></returns>
-        public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> RunInference(IOnnxModel model, OnnxModelType modelType, IReadOnlyCollection<NamedOnnxValue> inputs)
+        public IDisposableReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, string inputName, OrtValue inputValue, string outputName)
         {
-            return RunInternal(model, modelType, inputs);
+            var inputs = new Dictionary<string, OrtValue> { { inputName, inputValue } };
+            var outputs = new List<string> { outputName };
+            return RunInference(model, modelType, inputs, outputs);
         }
 
 
         /// <summary>
-        /// Runs inference on the specified model asynchronously(ish).
+        /// Runs the inference (Use when output size is unknown)
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
         /// <param name="inputs">The inputs.</param>
+        /// <param name="outputs">The outputs.</param>
         /// <returns></returns>
-        public async Task<IDisposableReadOnlyCollection<DisposableNamedOnnxValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, IReadOnlyCollection<NamedOnnxValue> inputs)
+        public IDisposableReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, IReadOnlyCollection<string> outputs)
         {
-            return await Task.Run(() => RunInternal(model, modelType, inputs)).ConfigureAwait(false);
+            return GetModelSet(model)
+                .GetSession(modelType)
+                .Run(new RunOptions(), inputs, outputs);
         }
 
 
         /// <summary>
-        /// Gets the input metadata.
+        /// Runs the inference asynchronously, (Use when output size is known)
+        /// Output buffer size must be known and set before inference is run
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
+        /// <param name="inputName">Name of the input.</param>
+        /// <param name="inputValue">The input value.</param>
+        /// <param name="outputName">Name of the output.</param>
+        /// <param name="outputValue">The output value.</param>
         /// <returns></returns>
-        /// <exception cref="System.NotImplementedException"></exception>
-        public IReadOnlyDictionary<string, NodeMetadata> GetInputMetadata(IOnnxModel model, OnnxModelType modelType)
+        public Task<IReadOnlyCollection<OrtValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, string inputName, OrtValue inputValue, string outputName, OrtValue outputValue)
         {
-            return InputMetadataInternal(model, modelType);
+            var inputs = new Dictionary<string, OrtValue> { { inputName, inputValue } };
+            var outputs = new Dictionary<string, OrtValue> { { outputName, outputValue } };
+            return RunInferenceAsync(model, modelType, inputs, outputs);
         }
 
 
         /// <summary>
-        /// Gets the input names.
+        /// Runs the inference asynchronously, (Use when output size is known)
+        /// Output buffer size must be known and set before inference is run
         /// </summary>
+        /// <param name="model">The model.</param>
         /// <param name="modelType">Type of the model.</param>
+        /// <param name="inputs">The inputs.</param>
+        /// <param name="outputs">The outputs.</param>
         /// <returns></returns>
-        /// <exception cref="System.NotImplementedException"></exception>
-        public IReadOnlyList<string> GetInputNames(IOnnxModel model, OnnxModelType modelType)
+        public Task<IReadOnlyCollection<OrtValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, Dictionary<string, OrtValue> outputs)
         {
-            return InputNamesInternal(model, modelType);
+            return GetModelSet(model)
+                .GetSession(modelType)
+                .RunAsync(new RunOptions(), inputs.Keys, inputs.Values, outputs.Keys, outputs.Values);
         }
 
 
         /// <summary>
-        /// Gets the output metadata.
+        /// Gets the input metadata.
         /// </summary>
         /// <param name="modelType">Type of the model.</param>
         /// <returns></returns>
         /// <exception cref="System.NotImplementedException"></exception>
-        public IReadOnlyDictionary<string, NodeMetadata> GetOutputMetadata(IOnnxModel model, OnnxModelType modelType)
+        public IReadOnlyDictionary<string, NodeMetadata> GetInputMetadata(IOnnxModel model, OnnxModelType modelType)
         {
-            return OutputMetadataInternal(model, modelType);
+            return InputMetadataInternal(model, modelType);
         }
 
 
         /// <summary>
-        /// Gets the output names.
+        /// Gets the input names.
         /// </summary>
         /// <param name="modelType">Type of the model.</param>
         /// <returns></returns>
         /// <exception cref="System.NotImplementedException"></exception>
-        public IReadOnlyList<string> GetOutputNames(IOnnxModel model, OnnxModelType modelType)
+        public IReadOnlyList<string> GetInputNames(IOnnxModel model, OnnxModelType modelType)
         {
-            return OutputNamesInternal(model, modelType);
+            return InputNamesInternal(model, modelType);
         }
 
 
         /// <summary>
-        /// Runs inference on the specified model.
+        /// Gets the output metadata.
         /// </summary>
         /// <param name="modelType">Type of the model.</param>
-        /// <param name="inputs">The inputs.</param>
         /// <returns></returns>
-        public IReadOnlyCollection<OrtValue> RunInference(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, IReadOnlyCollection<string> outputs)
+        /// <exception cref="System.NotImplementedException"></exception>
+        public IReadOnlyDictionary<string, NodeMetadata> GetOutputMetadata(IOnnxModel model, OnnxModelType modelType)
         {
-            return GetModelSet(model)
-                .GetSession(modelType)
-                .Run(new RunOptions(), inputs, outputs);
+            return OutputMetadataInternal(model, modelType);
         }
 
 
         /// <summary>
-        /// Runs inference on the specified model.
+        /// Gets the output names.
         /// </summary>
         /// <param name="modelType">Type of the model.</param>
-        /// <param name="inputs">The inputs.</param>
         /// <returns></returns>
-        public Task<IReadOnlyCollection<OrtValue>> RunInferenceAsync(IOnnxModel model, OnnxModelType modelType, Dictionary<string, OrtValue> inputs, Dictionary<string, OrtValue> outputs)
+        /// <exception cref="System.NotImplementedException"></exception>
+        public IReadOnlyList<string> GetOutputNames(IOnnxModel model, OnnxModelType modelType)
         {
-            return GetModelSet(model)
-                .GetSession(modelType)
-                .RunAsync(new RunOptions(), inputs.Keys, inputs.Values, outputs.Keys, outputs.Values);
+            return OutputNamesInternal(model, modelType);
         }
 
 
@@ -334,5 +350,7 @@ public void Dispose()
                 onnxModelSet?.Dispose();
             }
         }
+
+
     }
 }
diff --git a/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs b/OnnxStack.StableDiffusion/Diffusers/DiffuserBase.cs
@@ -222,9 +222,7 @@ protected virtual async Task<DenseTensor<float>> DecodeLatentsAsync(IModelOption
             using (var inputTensorValue = latents.ToOrtValue(outputTensorMetaData))
             using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDim))
             {
-                var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };
-                var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };
-                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputs, outputs);
+                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);
                 using (var imageResult = results.First())
                 {
                     _logger?.LogEnd("Latents decoded", timestamp);
diff --git a/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/ImageDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/LatentConsistency/ImageDiffuser.cs
@@ -69,9 +69,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti
             using (var inputTensorValue = imageTensor.ToOrtValue(outputTensorMetaData))
             using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDimension))
             {
-                var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };
-                var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };
-                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);
+                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);
                 using (var result = results.First())
                 {
                     var outputResult = outputTensorValue.ToDenseTensor();
diff --git a/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/ImageDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/ImageDiffuser.cs
@@ -71,9 +71,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti
             using (var inputTensorValue = imageTensor.ToOrtValue(outputTensorMetaData))
             using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDimension))
             {
-                var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };
-                var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };
-                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);
+                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);
                 using (var result = results.First())
                 {
                     var outputResult = outputTensorValue.ToDenseTensor();
diff --git a/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/InpaintDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/InpaintDiffuser.cs
@@ -14,6 +14,7 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
+using System.Reflection;
 using System.Threading;
 using System.Threading.Tasks;
 
@@ -66,7 +67,7 @@ protected override async Task<DenseTensor<float>> SchedulerStepAsync(IModelOptio
                 var maskImage = PrepareMask(modelOptions, promptOptions, schedulerOptions);
 
                 // Create Masked Image Latents
-                var maskedImage = PrepareImageMask(modelOptions, promptOptions, schedulerOptions);
+                var maskedImage = await PrepareImageMask(modelOptions, promptOptions, schedulerOptions);
 
                 // Get Model metadata
                 var inputNames = _onnxModelService.GetInputNames(modelOptions, OnnxModelType.Unet);
@@ -179,7 +180,7 @@ private DenseTensor<float> PrepareMask(IModelOptions modelOptions, PromptOptions
         /// <param name="schedulerOptions">The scheduler options.</param>
         /// <param name="scheduler">The scheduler.</param>
         /// <returns></returns>
-        private DenseTensor<float> PrepareImageMask(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions)
+        private async Task<DenseTensor<float>> PrepareImageMask(IModelOptions modelOptions, PromptOptions promptOptions, SchedulerOptions schedulerOptions)
         {
             using (var image = promptOptions.InputImage.ToImage())
             using (var mask = promptOptions.InputImageMask.ToImage())
@@ -227,15 +228,24 @@ private DenseTensor<float> PrepareImageMask(IModelOptions modelOptions, PromptOp
 
                 // Encode the image
                 var inputNames = _onnxModelService.GetInputNames(modelOptions, OnnxModelType.VaeEncoder);
-                var inputParameters = CreateInputParameters(NamedOnnxValue.CreateFromTensor(inputNames[0], imageMaskedTensor));
-                using (var inferResult = _onnxModelService.RunInference(modelOptions, OnnxModelType.VaeEncoder, inputParameters))
+                var outputNames = _onnxModelService.GetOutputNames(modelOptions, OnnxModelType.VaeEncoder);
+                var outputMetaData = _onnxModelService.GetOutputMetadata(modelOptions, OnnxModelType.VaeEncoder);
+                var outputTensorMetaData = outputMetaData[outputNames[0]];
+
+                var outputDimension = schedulerOptions.GetScaledDimension();
+                using (var inputTensorValue = imageTensor.ToOrtValue(outputTensorMetaData))
+                using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDimension))
                 {
-                    var sample = inferResult.FirstElementAs<DenseTensor<float>>();
-                    var scaledSample = sample.MultiplyBy(modelOptions.ScaleFactor);
-                    if (schedulerOptions.GuidanceScale > 1f)
-                        scaledSample = scaledSample.Repeat(2);
+                    var results = await _onnxModelService.RunInferenceAsync(modelOptions, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);
+                    using (var result = results.First())
+                    {
+                        var sample = outputTensorValue.ToDenseTensor();
+                        var scaledSample = sample.MultiplyBy(modelOptions.ScaleFactor);
+                        if (schedulerOptions.GuidanceScale > 1f)
+                            scaledSample = scaledSample.Repeat(2);
 
-                    return scaledSample;
+                        return scaledSample;
+                    }
                 }
             }
         }
diff --git a/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/InpaintLegacyDiffuser.cs b/OnnxStack.StableDiffusion/Diffusers/StableDiffusion/InpaintLegacyDiffuser.cs
@@ -169,9 +169,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti
             using (var inputTensorValue = imageTensor.ToOrtValue(outputBufferMetaData))
             using (var outputTensorValue = outputBufferMetaData.CreateOutputBuffer(outputDimensions))
             {
-                var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };
-                var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };
-                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);
+                var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);
                 using (var result = results.First())
                 {
                     var outputResult = outputTensorValue.ToDenseTensor();
diff --git a/OnnxStack.StableDiffusion/Services/PromptService.cs b/OnnxStack.StableDiffusion/Services/PromptService.cs

Original file line number	Diff line number	Diff line change
`@@ -222,9 +222,7 @@ protected virtual async Task<DenseTensor<float>> DecodeLatentsAsync(IModelOption`
`222`	`222`	`using (var inputTensorValue = latents.ToOrtValue(outputTensorMetaData))`
`223`	`223`	`using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDim))`
`224`	`224`	`{`
`225`		`- var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };`
`226`		`- var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };`
`227`		`- var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputs, outputs);`
	`225`	`+ var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeDecoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);`
`228`	`226`	`using (var imageResult = results.First())`
`229`	`227`	`{`
`230`	`228`	`_logger?.LogEnd("Latents decoded", timestamp);`
Original file line number	Diff line number	Diff line change
`@@ -69,9 +69,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti`
`69`	`69`	`using (var inputTensorValue = imageTensor.ToOrtValue(outputTensorMetaData))`
`70`	`70`	`using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDimension))`
`71`	`71`	`{`
`72`		`- var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };`
`73`		`- var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };`
`74`		`- var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);`
	`72`	`+ var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);`
`75`	`73`	`using (var result = results.First())`
`76`	`74`	`{`
`77`	`75`	`var outputResult = outputTensorValue.ToDenseTensor();`
Original file line number	Diff line number	Diff line change
`@@ -71,9 +71,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti`
`71`	`71`	`using (var inputTensorValue = imageTensor.ToOrtValue(outputTensorMetaData))`
`72`	`72`	`using (var outputTensorValue = outputTensorMetaData.CreateOutputBuffer(outputDimension))`
`73`	`73`	`{`
`74`		`- var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };`
`75`		`- var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };`
`76`		`- var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);`
	`74`	`+ var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);`
`77`	`75`	`using (var result = results.First())`
`78`	`76`	`{`
`79`	`77`	`var outputResult = outputTensorValue.ToDenseTensor();`
Original file line number	Diff line number	Diff line change
`@@ -169,9 +169,7 @@ protected override async Task<DenseTensor<float>> PrepareLatentsAsync(IModelOpti`
`169`	`169`	`using (var inputTensorValue = imageTensor.ToOrtValue(outputBufferMetaData))`
`170`	`170`	`using (var outputTensorValue = outputBufferMetaData.CreateOutputBuffer(outputDimensions))`
`171`	`171`	`{`
`172`		`- var inputs = new Dictionary<string, OrtValue> { { inputNames[0], inputTensorValue } };`
`173`		`- var outputs = new Dictionary<string, OrtValue> { { outputNames[0], outputTensorValue } };`
`174`		`- var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputs, outputs);`
	`172`	`+ var results = await _onnxModelService.RunInferenceAsync(model, OnnxModelType.VaeEncoder, inputNames[0], inputTensorValue, outputNames[0], outputTensorValue);`
`175`	`173`	`using (var result = results.First())`
`176`	`174`	`{`
`177`	`175`	`var outputResult = outputTensorValue.ToDenseTensor();`