Skip to content

Commit 4b26030

Browse files
committed
Remove obsolete Beam Id
1 parent 03ebec8 commit 4b26030

File tree

8 files changed

+84
-92
lines changed

8 files changed

+84
-92
lines changed

TensorStack.TextGeneration/Pipelines/DecoderPipeline.cs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,6 @@ protected virtual async Task<Sequence[]> BeamSearchAsync(O options, Cancellation
198198
cancellationToken.ThrowIfCancellationRequested();
199199

200200
var beamCandidate = beam.Clone();
201-
beamCandidate.Id = beam.Id;
202-
if (initialPass)
203-
beamCandidate.Id = beamCandidates.Count;
204-
205201
beamCandidate.Tokens.Add(sample.TokenId);
206202
beamCandidate.Score += sample.Score;
207203
beamCandidate.PenaltyScore = GetLengthPenalty(beamCandidate, options.LengthPenalty);
@@ -329,13 +325,7 @@ protected virtual Sequence[] NormalizeAndSort(SequenceCollection sequences, O op
329325
.Where(x => x.IsComplete)
330326
.OrderByDescending(s => s.PenaltyScore)
331327
.ToArray();
332-
333-
var beam = 0;
334-
foreach (var sequence in resultSequences)
335-
{
336-
sequence.Id = beam++;
337-
}
338-
328+
339329
sequences.Remove(resultSequences);
340330
sequences.Clear();
341331
return resultSequences;

TensorStack.TextGeneration/Pipelines/Florence/FlorencePipeline.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace TensorStack.TextGeneration.Pipelines.Florence
1919
{
2020
public class FlorencePipeline : EncoderDecoderPipeline<FlorenceOptions>,
2121
IPipeline<GenerateResult, FlorenceOptions>,
22-
IPipelineStream<GenerateResult, FlorenceSearchOptions>
22+
IPipeline<GenerateResult[], FlorenceSearchOptions>
2323
{
2424
private readonly FlorenceConfig _configuration;
2525
private readonly PreProcessor _preProcessor;
@@ -107,7 +107,7 @@ public virtual async Task<GenerateResult> RunAsync(FlorenceOptions options, IPro
107107
}
108108

109109

110-
public virtual async IAsyncEnumerable<GenerateResult> RunAsync(FlorenceSearchOptions options, IProgress<RunProgress> progressCallback = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
110+
public virtual async Task<GenerateResult[]> RunAsync(FlorenceSearchOptions options, IProgress<RunProgress> progressCallback = null, CancellationToken cancellationToken = default)
111111
{
112112
var textPrompt = _preProcessor.ProcessPrompt(options);
113113
var imagePrompt = _preProcessor.ProcessImage(options);
@@ -118,21 +118,24 @@ public virtual async IAsyncEnumerable<GenerateResult> RunAsync(FlorenceSearchOpt
118118
EncoderOutput = await RunEncoderAsync();
119119

120120
var sequences = await BeamSearchAsync(options, cancellationToken);
121-
foreach (var sequence in sequences)
121+
var results = new GenerateResult[sequences.Length];
122+
for (int beam = 0; beam < sequences.Length; beam++)
122123
{
124+
var sequence = sequences[beam];
123125
using (sequence)
124126
{
125127
var processedBeamOutput = _postProcessor.Process(options, sequence.Tokens);
126-
yield return new GenerateResult
128+
results[beam] = new GenerateResult
127129
{
128-
Beam = sequence.Id,
130+
Beam = beam,
129131
Score = sequence.Score,
130132
PenaltyScore = sequence.PenaltyScore,
131133
Result = processedBeamOutput.Result,
132134
CoordinateResults = processedBeamOutput.CoordinateResults
133135
};
134136
}
135137
}
138+
return results;
136139
}
137140

138141

TensorStack.TextGeneration/Pipelines/Other/SummaryPipeline.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ public async Task<GenerateResult[]> RunAsync(SearchOptions options, IProgress<Ru
5757

5858
var sequences = await BeamSearchAsync(options, cancellationToken);
5959
var results = new GenerateResult[sequences.Length];
60-
for (int i = 0; i < sequences.Length; i++)
60+
for (int beam = 0; beam < sequences.Length; beam++)
6161
{
62-
var sequence = sequences[i];
62+
var sequence = sequences[beam];
6363
using (sequence)
6464
{
65-
results[i] = new GenerateResult
65+
results[beam] = new GenerateResult
6666
{
67-
Beam = sequence.Id,
67+
Beam = beam,
6868
Score = sequence.Score,
6969
PenaltyScore = sequence.PenaltyScore,
7070
Result = Tokenizer.Decode(sequence.Tokens)

TensorStack.TextGeneration/Pipelines/Phi/Phi3Pipeline.cs

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,14 @@ public async Task<GenerateResult[]> RunAsync(SearchOptions options, IProgress<Ru
6464

6565
var sequences = await BeamSearchAsync(options, cancellationToken);
6666
var results = new GenerateResult[sequences.Length];
67-
for (int i = 0; i < sequences.Length; i++)
67+
for (int beam = 0; beam < sequences.Length; beam++)
6868
{
69-
var sequence = sequences[i];
69+
var sequence = sequences[beam];
7070
using (sequence)
7171
{
72-
results[i] = new GenerateResult
72+
results[beam] = new GenerateResult
7373
{
74-
Beam = sequence.Id,
74+
Beam = beam,
7575
Score = sequence.Score,
7676
PenaltyScore = sequence.PenaltyScore,
7777
Result = Tokenizer.Decode(sequence.Tokens)
@@ -82,6 +82,28 @@ public async Task<GenerateResult[]> RunAsync(SearchOptions options, IProgress<Ru
8282
}
8383

8484

85+
/// <summary>
86+
/// Gets the token processors.
87+
/// </summary>
88+
/// <param name="options">The options.</param>
89+
/// <returns>ITokenProcessor[].</returns>
90+
protected override ITokenProcessor[] GetTokenProcessors(GenerateOptions options)
91+
{
92+
return
93+
[
94+
new EOSTokenProcessor
95+
(
96+
options.MinLength, // min length
97+
Tokenizer.EOS,
98+
32000, // <|endoftext|>
99+
32001, // <|assistant|>
100+
32007 // <|end|>
101+
),
102+
new MaxLengthTokenProcessor(options.MaxLength)
103+
];
104+
}
105+
106+
85107
/// <summary>
86108
/// Initialize the Decoder cache
87109
/// </summary>
@@ -91,32 +113,15 @@ protected override async Task<Sequence> InitializeAsync(GenerateOptions options)
91113
{
92114
var modelMetadata = await Decoder.LoadAsync();
93115
var dataType = modelMetadata.Outputs[0].Value.ElementDataType;
94-
var kvCache = new KVCacheDecoder(dataType, DecoderConfig.NumHeads, DecoderConfig.NumLayers, DecoderConfig.HiddenSize, DecoderConfig.NumKVHeads);
116+
var kvCache = new KVCacheDecoder(dataType, DecoderConfig.NumHeads, DecoderConfig.NumLayers, DecoderConfig.HiddenSize, DecoderConfig.NumKVHeads, options.MaxLength);
95117
var sequence = new Sequence(kvCache, Tokenizer.BOS);
96118
sequence.Initialize(TokenizerOutput.Length);
97119

98-
var positionIds = GetPositionIds(modelMetadata, 0, TokenizerOutput.Length);
99-
var attentionMask = new Tensor<long>([1, TokenizerOutput.Length], 1);
100-
using (var parameters = new ModelParameters(modelMetadata))
101-
{
102-
// Inputs
103-
parameters.AddInput(TokenizerOutput.InputIds);
104-
if (positionIds != null)
105-
parameters.AddInput(positionIds);
106-
parameters.AddInput(attentionMask);
107-
foreach (var pastKeyValue in sequence.Cache)
108-
parameters.AddInput(pastKeyValue);
109-
110-
// Outputs
111-
foreach (var output in modelMetadata.Outputs)
112-
parameters.AddOutput();
113-
114-
// Result
115-
var modelResult = Decoder.RunInference(parameters);
116-
modelResult[0].Dispose(); // logits
117-
var presentKeyValues = modelResult.ToArray()[1..];
118-
sequence.UpdateCache(presentKeyValues, false);
119-
}
120+
var position = TokenizerOutput.Length;
121+
var inputIds = TokenizerOutput.InputIds;
122+
var positionIds = GetPositionIds(modelMetadata, 0, position);
123+
var attentionMask = new Tensor<long>([1, position], 1);
124+
RunDecoderInternalAsync(modelMetadata, sequence, inputIds, positionIds, attentionMask, false);
120125
return sequence;
121126
}
122127

@@ -128,11 +133,26 @@ protected override async Task<Sequence> InitializeAsync(GenerateOptions options)
128133
/// <returns>A Task&lt;Tensor`1&gt; representing the asynchronous operation.</returns>
129134
protected override async Task<Tensor<float>> RunDecoderAsync(Sequence sequence)
130135
{
131-
var currentPosition = TokenizerOutput.Length + sequence.Tokens.Count;
132136
var modelMetadata = await Decoder.LoadAsync();
137+
var position = TokenizerOutput.Length + sequence.Tokens.Count;
133138
var inputIds = new Tensor<long>([1, 1], sequence.Tokens[^1]);
134-
var positionIds = GetPositionIds(modelMetadata, currentPosition);
135-
var attentionMask = new Tensor<long>([1, currentPosition], 1);
139+
var positionIds = GetPositionIds(modelMetadata, position);
140+
var attentionMask = new Tensor<long>([1, position], 1);
141+
return RunDecoderInternalAsync(modelMetadata, sequence, inputIds, positionIds, attentionMask, true);
142+
}
143+
144+
145+
/// <summary>
146+
/// Runs the decoder
147+
/// </summary>
148+
/// <param name="modelMetadata">The model metadata.</param>
149+
/// <param name="sequence">The sequence.</param>
150+
/// <param name="inputIds">The input ids.</param>
151+
/// <param name="positionIds">The position ids.</param>
152+
/// <param name="attentionMask">The attention mask.</param>
153+
/// <param name="useBranchCache">if set to <c>true</c> [use branch cache].</param>
154+
private Tensor<float> RunDecoderInternalAsync(ModelMetadata modelMetadata, Sequence sequence, Tensor<long> inputIds, Tensor<long> positionIds, Tensor<long> attentionMask, bool useBranchCache)
155+
{
136156
using (var parameters = new ModelParameters(modelMetadata))
137157
{
138158
// Inputs
@@ -151,38 +171,17 @@ protected override async Task<Tensor<float>> RunDecoderAsync(Sequence sequence)
151171
var modelResult = Decoder.RunInference(parameters);
152172
using (var logitsResult = modelResult[0])
153173
{
154-
var logits = logitsResult.ToTensor();
174+
var dimension = logitsResult.GetDimensions();
175+
var logits = logitsResult.ToTensor(dimension[1..]);
155176
var presentKeyValues = modelResult.ToArray()[1..];
156177

157-
sequence.UpdateCache(presentKeyValues, false);
158-
return logits.Reshape([logits.Dimensions[0], logits.Dimensions[2]]);
178+
sequence.UpdateCache(presentKeyValues, useBranchCache);
179+
return logits;
159180
}
160181
}
161182
}
162183

163184

164-
/// <summary>
165-
/// Gets the token processors.
166-
/// </summary>
167-
/// <param name="options">The options.</param>
168-
/// <returns>ITokenProcessor[].</returns>
169-
protected override ITokenProcessor[] GetTokenProcessors(GenerateOptions options)
170-
{
171-
return
172-
[
173-
new EOSTokenProcessor
174-
(
175-
options.MinLength, // min length
176-
Tokenizer.EOS,
177-
32000, // <|endoftext|>
178-
32001 // <|assistant|>
179-
// 32007 // <|end|>
180-
),
181-
new MaxLengthTokenProcessor(options.MaxLength)
182-
];
183-
}
184-
185-
186185
/// <summary>
187186
/// Creates the Phi3Pipeline
188187
/// </summary>

TensorStack.TextGeneration/Pipelines/Whisper/WhisperPipeline.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,12 @@ public async Task<GenerateResult[]> RunAsync(WhisperSearchOptions options, IProg
8787
{
8888
await RunEncoderAsync(sample);
8989
var sequences = await BeamSearchAsync(options, cancellationToken);
90-
for (int i = 0; i < sequences.Length; i++)
90+
for (int beam = 0; beam < sequences.Length; beam++)
9191
{
92-
var sequence = sequences[i];
92+
var sequence = sequences[beam];
9393
using (sequence)
9494
{
95-
var existing = results.ElementAtOrDefault(i);
95+
var existing = results.ElementAtOrDefault(beam);
9696
if (existing != null)
9797
{
9898
existing.Score += sequence.Score;
@@ -103,7 +103,7 @@ public async Task<GenerateResult[]> RunAsync(WhisperSearchOptions options, IProg
103103
{
104104
results.Add(new GenerateResult
105105
{
106-
Beam = sequence.Id,
106+
Beam = beam,
107107
Score = sequence.Score,
108108
PenaltyScore = sequence.PenaltyScore,
109109
Result = Tokenizer.Decode(sequence.Tokens)

TensorStack.TextGeneration/Processing/KVCacheDecoder.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ public sealed class KVCacheDecoder : IKVCache
1313
private readonly int _numLayers;
1414
private readonly int _hiddenSize;
1515
private readonly int _numKVHeads;
16+
private readonly int _maxLength;
1617
private OrtValue[] _values;
1718

1819

@@ -23,13 +24,14 @@ public sealed class KVCacheDecoder : IKVCache
2324
/// <param name="numHeads">The number heads.</param>
2425
/// <param name="numLayers">The number layers.</param>
2526
/// <param name="hiddenSize">Size of the hidden.</param>
26-
public KVCacheDecoder(OrtType dataType, int numHeads, int numLayers, int hiddenSize, int numKVHeads)
27+
public KVCacheDecoder(OrtType dataType, int numHeads, int numLayers, int hiddenSize, int numKVHeads, int maxLength)
2728
{
2829
_dataType = dataType;
2930
_numHeads = numHeads;
3031
_numLayers = numLayers;
3132
_hiddenSize = hiddenSize;
3233
_numKVHeads = numKVHeads;
34+
_maxLength = maxLength;
3335
}
3436

3537

@@ -41,8 +43,8 @@ public KVCacheDecoder(OrtType dataType, int numHeads, int numLayers, int hiddenS
4143
/// <param name="numLayers">The number layers.</param>
4244
/// <param name="hiddenSize">Size of the hidden.</param>
4345
/// <param name="values">The cache values.</param>
44-
private KVCacheDecoder(OrtType dataType, int numHeads, int numLayers, int hiddenSize, int numKVHeads, OrtValue[] values)
45-
: this(dataType, numHeads, numLayers, hiddenSize, numKVHeads)
46+
private KVCacheDecoder(OrtType dataType, int numHeads, int numLayers, int hiddenSize, int numKVHeads, int maxLength, OrtValue[] values)
47+
: this(dataType, numHeads, numLayers, hiddenSize, numKVHeads, maxLength)
4648
{
4749
_values = values;
4850
}
@@ -109,7 +111,7 @@ public IKVCache Clone()
109111
for (int i = 0; i < _values.Length; i++)
110112
cacheValues[i] = _values[i].Clone();
111113

112-
return new KVCacheDecoder(_dataType, _numHeads, _numLayers, _hiddenSize, _numKVHeads, cacheValues);
114+
return new KVCacheDecoder(_dataType, _numHeads, _numLayers, _hiddenSize, _numKVHeads, _maxLength, cacheValues);
113115
}
114116

115117

TensorStack.TextGeneration/Processing/KVCacheEncoderDecoder.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ public sealed class KVCacheEncoderDecoder : IKVCache
1212
private readonly int _numHeads;
1313
private readonly int _numLayers;
1414
private readonly int _hiddenSize;
15+
private readonly int _headDimension;
16+
1517
private OrtValue[] _values;
1618

1719
/// <summary>
@@ -24,6 +26,7 @@ public KVCacheEncoderDecoder(OrtType dataType, int numHeads, int numLayers, int
2426
_numHeads = numHeads;
2527
_numLayers = numLayers;
2628
_hiddenSize = hiddenSize;
29+
_headDimension = _hiddenSize / _numHeads;
2730
}
2831

2932

@@ -58,8 +61,8 @@ public void Initialize(int initialSize)
5861
{
5962
_values = new OrtValue[_numLayers * 4];
6063
var allocator = OrtAllocator.DefaultInstance;
61-
var decoderDims = new[] { 1L, _numHeads, 1, (_hiddenSize / _numHeads) };
62-
var encoderDims = new[] { 1L, _numHeads, initialSize, (_hiddenSize / _numHeads) };
64+
var decoderDims = new[] { 1L, _numHeads, 1, _headDimension };
65+
var encoderDims = new[] { 1L, _numHeads, initialSize, _headDimension };
6366
for (var i = 0; i < _values.Length; ++i)
6467
{
6568
if (i % 4 == 0)

TensorStack.TextGeneration/Processing/Sequence.cs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ public sealed class Sequence : IDisposable
1515
/// </summary>
1616
/// <param name="cache">The cache.</param>
1717
/// <param name="bos">The bos.</param>
18-
public Sequence(IKVCache cache, long bos)
18+
public Sequence(IKVCache cache, params List<long> startSequence)
1919
{
20-
Tokens = [bos];
2120
_cache = cache;
21+
Tokens = startSequence;
2222
}
2323

2424
/// <summary>
@@ -34,11 +34,6 @@ private Sequence(List<long> tokens, float score, IKVCache cache)
3434
_cache = cache;
3535
}
3636

37-
/// <summary>
38-
/// Gets or sets the identifier.
39-
/// </summary>
40-
public int Id { get; set; }
41-
4237
/// <summary>
4338
/// Gets the tokens.
4439
/// </summary>

0 commit comments

Comments
 (0)