Skip to content
This repository was archived by the owner on Nov 27, 2024. It is now read-only.

Commit d54393e

Browse files
committed
WIP on (no branch): 5da385d Fix up Tokenizer/TextEncoder inputs
2 parents 5da385d + c3743a0 commit d54393e

File tree

4 files changed

+29
-74
lines changed

4 files changed

+29
-74
lines changed

OnnxStack.Console/appsettings.json

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,29 @@
2525
"ExecutionProvider": "DirectML",
2626
"SchedulerOptions": {
2727
"InferenceSteps": 22,
28-
"GuidanceScale": 8
28+
"GuidanceScale": 8
2929
},
3030
"TokenizerConfig": {
3131
"PadTokenId": 49407,
3232
"BlankTokenId": 49407,
3333
"TokenizerLimit": 77,
3434
"TokenizerLength": 768,
35-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\cliptokenizer.onnx"
35+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5-onnx\\cliptokenizer.onnx"
3636
},
3737
"TextEncoderConfig": {
38-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\text_encoder\\model.onnx"
38+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5-onnx\\text_encoder\\model.onnx"
3939
},
4040
"UnetConfig": {
4141
"ModelType": "Base",
42-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\unet\\model.onnx"
42+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5-onnx\\unet\\model.onnx"
4343
},
4444
"VaeDecoderConfig": {
4545
"ScaleFactor": 0.18215,
46-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\vae_decoder\\model.onnx"
46+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5-onnx\\vae_decoder\\model.onnx"
4747
},
4848
"VaeEncoderConfig": {
4949
"ScaleFactor": 0.18215,
50-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\vae_encoder\\model.onnx"
50+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5-onnx\\vae_encoder\\model.onnx"
5151
}
5252
},
5353
{
@@ -70,22 +70,22 @@
7070
"BlankTokenId": 49407,
7171
"TokenizerLimit": 77,
7272
"TokenizerLength": 768,
73-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\cliptokenizer.onnx"
73+
"OnnxModelPath": "D:\\Repositories\\LCM_Dreamshaper_v7-onnx\\tokenizer\\model.onnx"
7474
},
7575
"TextEncoderConfig": {
76-
"OnnxModelPath": "D:\\Repositories\\lcm-dreamshaper-v7-f16\\text_encoder\\model.onnx"
76+
"OnnxModelPath": "D:\\Repositories\\LCM_Dreamshaper_v7-onnx\\text_encoder\\model.onnx"
7777
},
7878
"UnetConfig": {
7979
"ModelType": "Base",
80-
"OnnxModelPath": "D:\\Repositories\\lcm-dreamshaper-v7-f16\\unet\\model.onnx"
80+
"OnnxModelPath": "D:\\Repositories\\LCM_Dreamshaper_v7-onnx\\unet\\model.onnx"
8181
},
8282
"VaeDecoderConfig": {
8383
"ScaleFactor": 0.18215,
84-
"OnnxModelPath": "D:\\Repositories\\lcm-dreamshaper-v7-f16\\vae_decoder\\model.onnx"
84+
"OnnxModelPath": "D:\\Repositories\\LCM_Dreamshaper_v7-onnx\\vae_decoder\\model.onnx"
8585
},
8686
"VaeEncoderConfig": {
8787
"ScaleFactor": 0.18215,
88-
"OnnxModelPath": "D:\\Repositories\\lcm-dreamshaper-v7-f16\\vae_encoder\\model.onnx"
88+
"OnnxModelPath": "D:\\Repositories\\LCM_Dreamshaper_v7-onnx\\vae_encoder\\model.onnx"
8989
}
9090
},
9191
{
@@ -108,32 +108,32 @@
108108
"BlankTokenId": 49407,
109109
"TokenizerLimit": 77,
110110
"TokenizerLength": 768,
111-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\cliptokenizer.onnx"
111+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\tokenizer\\model.onnx"
112112
},
113113
"Tokenizer2Config": {
114114
"PadTokenId": 1,
115115
"BlankTokenId": 49407,
116116
"TokenizerLimit": 77,
117117
"TokenizerLength": 1280,
118-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-v1-5\\cliptokenizer.onnx"
118+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\tokenizer_2\\model.onnx"
119119
},
120120
"TextEncoderConfig": {
121-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-Olive-Onnx\\text_encoder\\model.onnx"
121+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\text_encoder\\model.onnx"
122122
},
123123
"TextEncoder2Config": {
124-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-Olive-Onnx\\text_encoder_2\\model.onnx"
124+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\text_encoder_2\\model.onnx"
125125
},
126126
"UnetConfig": {
127127
"ModelType": "Base",
128-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-Olive-Onnx\\unet\\model.onnx"
128+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\unet\\model.onnx"
129129
},
130130
"VaeDecoderConfig": {
131131
"ScaleFactor": 0.13025,
132-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-Olive-Onnx\\vae_decoder\\model.onnx"
132+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\vae_decoder\\model.onnx"
133133
},
134134
"VaeEncoderConfig": {
135135
"ScaleFactor": 0.13025,
136-
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-Olive-Onnx\\vae_encoder\\model.onnx"
136+
"OnnxModelPath": "D:\\Repositories\\stable-diffusion-xl-base-1.0-onnx\\vae_encoder\\model.onnx"
137137
}
138138
}
139139
]

OnnxStack.StableDiffusion/Pipelines/StableCascadePipeline.cs

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -196,26 +196,11 @@ private async Task<PromptEmbeddingsResult> GenerateEmbedsAsync(TokenizerResult i
196196
}
197197

198198
var promptTensor = new DenseTensor<float>(promptEmbeddings.ToArray(), new[] { 1, promptEmbeddings.Count / _tokenizer.TokenizerLength, _tokenizer.TokenizerLength });
199-
var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.ToArray(), new[] { 1, tokenBatches.Count, 1280 });
199+
var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.ToArray(), new[] { 1, tokenBatches.Count, _tokenizer.TokenizerLength });
200200
return new PromptEmbeddingsResult(promptTensor, pooledTensor);
201201
}
202202

203203

204-
/// <summary>
205-
/// Pads the input array with blank tokens.
206-
/// </summary>
207-
/// <param name="inputs">The inputs.</param>
208-
/// <param name="requiredLength">Length of the required.</param>
209-
/// <returns></returns>
210-
private IEnumerable<long> PadWithBlankTokens(IEnumerable<long> inputs, int requiredLength, int padTokenId)
211-
{
212-
var count = inputs.Count();
213-
if (requiredLength > count)
214-
return inputs.Concat(Enumerable.Repeat((long)padTokenId, requiredLength - count));
215-
return inputs;
216-
}
217-
218-
219204
/// <summary>
220205
/// Creates the pipeline from a ModelSet configuration.
221206
/// </summary>

OnnxStack.StableDiffusion/Pipelines/StableDiffusionPipeline.cs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
using System.Collections.Generic;
1717
using System.Linq;
1818
using System.Runtime.CompilerServices;
19-
using System.Runtime.Intrinsics.X86;
2019
using System.Threading;
2120
using System.Threading.Tasks;
2221

@@ -561,8 +560,8 @@ protected async Task<EncoderResult> EncodePromptTokensAsync(TokenizerResult toke
561560
inferenceParameters.AddOutputBuffer(new int[] { 1, _tokenizer.TokenizerLength });
562561

563562
var results = await _textEncoder.RunInferenceAsync(inferenceParameters);
564-
using (var promptEmbeds = results.Last())
565-
using (var promptEmbedsPooled = results.First())
563+
using (var promptEmbeds = results.First())
564+
using (var promptEmbedsPooled = results.Last())
566565
{
567566
return new EncoderResult(promptEmbeds.ToDenseTensor(), promptEmbedsPooled.ToDenseTensor());
568567
}
@@ -593,7 +592,6 @@ protected async Task<PromptEmbeddingsResult> GeneratePromptEmbedsAsync(Tokenizer
593592
foreach (var attentionBatch in inputTokens.AttentionMask.Batch(_tokenizer.TokenizerLimit))
594593
attentionBatches.Add(PadWithBlankTokens(attentionBatch, _tokenizer.TokenizerLimit, 1).ToArray());
595594

596-
597595
var promptEmbeddings = new List<float>();
598596
var pooledPromptEmbeddings = new List<float>();
599597
for (int i = 0; i < tokenBatches.Count; i++)
@@ -603,16 +601,8 @@ protected async Task<PromptEmbeddingsResult> GeneratePromptEmbedsAsync(Tokenizer
603601
pooledPromptEmbeddings.AddRange(result.PooledPromptEmbeds);
604602
}
605603

606-
607-
//var embeddingsDim = new[] { 1, promptEmbeddings.Count / _tokenizer2.TokenizerLength, _tokenizer2.TokenizerLength };
608-
//var promptTensor = new DenseTensor<float>(promptEmbeddings.ToArray(), embeddingsDim);
609-
610-
////TODO: Pooled embeds do not support more than 77 tokens, just grab first set
611-
//var pooledDim = new[] { 1, _tokenizer2.TokenizerLength };
612-
//var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.Take(_tokenizer2.TokenizerLength).ToArray(), pooledDim);
613-
614604
var promptTensor = new DenseTensor<float>(promptEmbeddings.ToArray(), new[] { 1, promptEmbeddings.Count / _tokenizer.TokenizerLength, _tokenizer.TokenizerLength });
615-
var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.ToArray(), new[] { 1, _tokenizer.TokenizerLimit, _tokenizer.TokenizerLength });
605+
var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.ToArray(), new[] { 1, tokenBatches.Count, _tokenizer.TokenizerLength });
616606
return new PromptEmbeddingsResult(promptTensor, pooledTensor);
617607
}
618608

OnnxStack.StableDiffusion/Pipelines/StableDiffusionXLPipeline.cs

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,17 @@ private async Task<EncoderResult> EncodeTokensAsync(TokenizerResult tokenizedInp
245245
{
246246
int hiddenStateIndex = metadata.Outputs.Count - 2;
247247
inferenceParameters.AddInputTensor(inputTensor);
248+
249+
// text_embeds + hidden_states.31 ("31" because SDXL always indexes from the penultimate layer.)
248250
inferenceParameters.AddOutputBuffer(new[] { 1, _tokenizer2.TokenizerLength });
249251
inferenceParameters.AddOutputBuffer(hiddenStateIndex, new[] { 1, tokenizedInput.InputIds.Length, _tokenizer2.TokenizerLength });
250252

251253
var results = await _textEncoder2.RunInferenceAsync(inferenceParameters);
252-
var promptEmbeds = results.Last().ToDenseTensor();
253-
var promptEmbedsPooled = results.First().ToDenseTensor();
254-
return new EncoderResult(promptEmbeds, promptEmbedsPooled);
254+
using (var promptEmbeds = results.Last())
255+
using (var promptEmbedsPooled = results.First())
256+
{
257+
return new EncoderResult(promptEmbeds.ToDenseTensor(), promptEmbedsPooled.ToDenseTensor());
258+
}
255259
}
256260
}
257261

@@ -279,7 +283,6 @@ private async Task<PromptEmbeddingsResult> GenerateEmbedsAsync(TokenizerResult i
279283
foreach (var attentionBatch in inputTokens.AttentionMask.Batch(_tokenizer.TokenizerLimit))
280284
attentionBatches.Add(PadWithBlankTokens(attentionBatch, _tokenizer.TokenizerLimit, 1).ToArray());
281285

282-
283286
var promptEmbeddings = new List<float>();
284287
var pooledPromptEmbeddings = new List<float>();
285288
for (int i = 0; i < tokenBatches.Count; i++)
@@ -288,36 +291,13 @@ private async Task<PromptEmbeddingsResult> GenerateEmbedsAsync(TokenizerResult i
288291
promptEmbeddings.AddRange(result.PromptEmbeds);
289292
pooledPromptEmbeddings.AddRange(result.PooledPromptEmbeds);
290293
}
291-
292-
293-
//var embeddingsDim = new[] { 1, promptEmbeddings.Count / _tokenizer2.TokenizerLength, _tokenizer2.TokenizerLength };
294-
//var promptTensor = new DenseTensor<float>(promptEmbeddings.ToArray(), embeddingsDim);
295-
296-
////TODO: Pooled embeds do not support more than 77 tokens, just grab first set
297-
//var pooledDim = new[] { 1, _tokenizer2.TokenizerLength };
298-
//var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.Take(_tokenizer2.TokenizerLength).ToArray(), pooledDim);
299294

300295
var promptTensor = new DenseTensor<float>(promptEmbeddings.ToArray(), new[] { 1, promptEmbeddings.Count / _tokenizer2.TokenizerLength, _tokenizer2.TokenizerLength });
301296
var pooledTensor = new DenseTensor<float>(pooledPromptEmbeddings.ToArray(), new[] { 1, pooledPromptEmbeddings.Count });
302297
return new PromptEmbeddingsResult(promptTensor, pooledTensor);
303298
}
304299

305300

306-
/// <summary>
307-
/// Pads the input array with blank tokens.
308-
/// </summary>
309-
/// <param name="inputs">The inputs.</param>
310-
/// <param name="requiredLength">Length of the required.</param>
311-
/// <returns></returns>
312-
private IEnumerable<long> PadWithBlankTokens(IEnumerable<long> inputs, int requiredLength, int padTokenId)
313-
{
314-
var count = inputs.Count();
315-
if (requiredLength > count)
316-
return inputs.Concat(Enumerable.Repeat((long)padTokenId, requiredLength - count));
317-
return inputs;
318-
}
319-
320-
321301
/// <summary>
322302
/// Creates the pipeline from a ModelSet configuration.
323303
/// </summary>

0 commit comments

Comments
 (0)