Skip to content

Commit 7c49483

Browse files
committed
Merge remote-tracking branch 'upstream/master' into mtmd_implementation
2 parents d5aab12 + a5d5b6d commit 7c49483

19 files changed

+171
-60
lines changed

.github/workflows/compile.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ jobs:
465465
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
466466
- build: 'x64-rosetta2'
467467
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF'
468+
env:
469+
MACOS_RPATH_DEFINE: "-DCMAKE_INSTALL_RPATH='@loader_path' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON"
468470
runs-on: macos-latest
469471
steps:
470472
- uses: actions/checkout@v4
@@ -481,7 +483,7 @@ jobs:
481483
run: |
482484
mkdir build
483485
cd build
484-
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
486+
cmake .. ${{ env.COMMON_DEFINE }} ${{ env.MACOS_RPATH_DEFINE }} ${{ matrix.defines }}
485487
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
486488
ls -R
487489
- name: Upload ggml
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
using System.Linq;
2+
using BenchmarkDotNet.Attributes;
3+
using BenchmarkDotNet.Engines;
4+
using BenchmarkDotNet.Jobs;
5+
using LLama.Common;
6+
7+
namespace LLama.Benchmark.Collections;
8+
9+
[SimpleJob(RunStrategy.Throughput, RuntimeMoniker.Net80)]
10+
[MemoryDiagnoser]
11+
[BenchmarkCategory("Collections", "FixedSizeQueue")]
12+
public class FixedSizeQueueBenchmark
13+
{
14+
[Params(32, 512, 4096)]
15+
public int Capacity { get; set; }
16+
17+
private int[] _values = Array.Empty<int>();
18+
19+
[GlobalSetup]
20+
public void Setup()
21+
{
22+
_values = Enumerable.Range(0, Capacity * 4).ToArray();
23+
}
24+
25+
[Benchmark]
26+
public int EnqueueWrap()
27+
{
28+
var queue = new FixedSizeQueue<int>(Capacity);
29+
foreach (var value in _values)
30+
queue.Enqueue(value);
31+
return queue.Count;
32+
}
33+
34+
[Benchmark]
35+
public int IterateTailSum()
36+
{
37+
var queue = new FixedSizeQueue<int>(Capacity);
38+
foreach (var value in _values)
39+
queue.Enqueue(value);
40+
41+
var sum = 0;
42+
foreach (var value in queue)
43+
sum += value;
44+
return sum;
45+
}
46+
}

LLama.Benchmark/LLama.Benchmark.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
</PropertyGroup>
1111

1212
<ItemGroup>
13-
<PackageReference Include="BenchmarkDotNet" Version="0.15.2" />
14-
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.2" />
13+
<PackageReference Include="BenchmarkDotNet" Version="0.15.4" />
14+
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.4" />
1515
</ItemGroup>
1616

1717
<ItemGroup>

LLama.Examples/LLama.Examples.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
<ItemGroup>
1717
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
1818
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.98.250508.3" />
19-
<PackageReference Include="Microsoft.SemanticKernel" Version="1.64.0" />
19+
<PackageReference Include="Microsoft.SemanticKernel" Version="1.65.0" />
2020
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.44.0-alpha" />
2121
<PackageReference Include="NAudio" Version="2.2.1" />
2222
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.11" />

LLama.Web/LLama.Web.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
</ItemGroup>
1616

1717
<ItemGroup>
18-
<PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="8.0.19" />
18+
<PackageReference Include="Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation" Version="8.0.20" />
1919
<PackageReference Include="System.Linq.Async" Version="6.0.3" />
2020
</ItemGroup>
2121

LLama.WebAPI/LLama.WebAPI.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
<ItemGroup>
1111
<PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" />
12-
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.19" />
12+
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.20" />
1313
<PackageReference Include="Swashbuckle.AspNetCore" Version="7.3.1" />
1414
</ItemGroup>
1515

LLama/Abstractions/IInferenceParams.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public interface IInferenceParams
1414
public int TokensKeep { get; set; }
1515

1616
/// <summary>
17-
/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
17+
/// how many new tokens to predict (n_predict), set to -1 to infinitely generate response
1818
/// until it complete.
1919
/// </summary>
2020
public int MaxTokens { get; set; }

LLama/AntipromptProcessor.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public sealed class AntipromptProcessor
1111
private int _longestAntiprompt;
1212
private readonly List<string> _antiprompts = new();
1313

14-
private string? _string;
14+
private string _buffer = string.Empty;
1515

1616

1717
/// <summary>
@@ -46,6 +46,8 @@ public void SetAntiprompts(IEnumerable<string> antiprompts)
4646
_longestAntiprompt = 0;
4747
foreach (var antiprompt in _antiprompts)
4848
_longestAntiprompt = Math.Max(_longestAntiprompt, antiprompt.Length);
49+
50+
_buffer = string.Empty;
4951
}
5052

5153
/// <summary>
@@ -55,21 +57,21 @@ public void SetAntiprompts(IEnumerable<string> antiprompts)
5557
/// <returns>true if the text buffer ends with any antiprompt</returns>
5658
public bool Add(string text)
5759
{
58-
_string += text;
60+
_buffer += text;
5961

6062
// When the string gets very long (4x antiprompt length) trim it down (to 2x antiprompt length).
6163
// This trimming leaves a lot of extra characters because two sequences can be considered "equal" in unicode
6264
// even with different numbers of characters. Hopefully there are enough characters here to handle all those weird circumstances!
6365
var maxLength = Math.Max(32, _longestAntiprompt * 4);
6466
var trimLength = Math.Max(16, _longestAntiprompt * 2);
65-
if (_string.Length > maxLength)
66-
_string = _string.Substring(_string.Length - trimLength);
67+
if (_buffer.Length > maxLength)
68+
_buffer = _buffer.Substring(_buffer.Length - trimLength);
6769

6870
foreach (var antiprompt in _antiprompts)
69-
if (_string.EndsWith(antiprompt, StringComparison.CurrentCulture))
71+
if (_buffer.EndsWith(antiprompt, StringComparison.CurrentCulture))
7072
return true;
7173

7274
return false;
7375
}
7476
}
75-
}
77+
}

LLama/Common/FixedSizeQueue.cs

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,75 +6,117 @@
66
namespace LLama.Common
77
{
88
/// <summary>
9-
/// A queue with fixed storage size.
10-
/// Currently it's only a naive implementation and needs to be further optimized in the future.
9+
/// A queue with fixed storage size backed by a circular buffer.
1110
/// </summary>
1211
public class FixedSizeQueue<T>
1312
: IReadOnlyList<T>
1413
{
15-
private readonly List<T> _storage;
14+
private readonly T[] _buffer;
15+
private int _start;
16+
private int _count;
17+
private T[]? _window;
18+
19+
// Minimum capacity for the temporary buffer used to expose a contiguous view.
20+
private const int MinimumWindowSize = 4;
21+
// Resize multiplier for the temporary buffer to reduce copy churn as it grows.
22+
private const int WindowGrowthFactor = 2;
1623

1724
/// <inheritdoc />
18-
public T this[int index] => _storage[index];
25+
public T this[int index]
26+
{
27+
get
28+
{
29+
if ((uint)index >= (uint)_count)
30+
throw new ArgumentOutOfRangeException(nameof(index));
31+
32+
var actualIndex = (_start + index) % Capacity;
33+
return _buffer[actualIndex];
34+
}
35+
}
1936

2037
/// <summary>
2138
/// Number of items in this queue
2239
/// </summary>
23-
public int Count => _storage.Count;
40+
public int Count => _count;
2441

2542
/// <summary>
2643
/// Maximum number of items allowed in this queue
2744
/// </summary>
2845
public int Capacity { get; }
2946

3047
/// <summary>
31-
/// Create a new queue
48+
/// Create a new queue.
3249
/// </summary>
33-
/// <param name="size">the maximum number of items to store in this queue</param>
50+
/// <param name="size">The maximum number of items to store in this queue.</param>
3451
public FixedSizeQueue(int size)
3552
{
53+
if (size <= 0)
54+
throw new ArgumentOutOfRangeException(nameof(size), size, "Capacity must be greater than zero.");
55+
3656
Capacity = size;
37-
_storage = new();
57+
_buffer = new T[size];
58+
_start = 0;
59+
_count = 0;
3860
}
3961

4062
/// <summary>
41-
/// Fill the quene with the data. Please ensure that data.Count &lt;= size
63+
/// Fill the queue with existing data. Please ensure that data.Count &lt;= size
4264
/// </summary>
4365
/// <param name="size"></param>
4466
/// <param name="data"></param>
4567
public FixedSizeQueue(int size, IEnumerable<T> data)
68+
: this(size)
4669
{
4770
#if NET6_0_OR_GREATER
48-
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
49-
// in which case we'll have to check later
5071
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
51-
throw new ArgumentException($"The max size set for the quene is {size}, but got {dataCount} initial values.");
72+
throw new ArgumentException($"The max size set for the queue is {size}, but got {dataCount} initial values.");
5273
#endif
5374

54-
// Size of "data" is unknown, copy it all into a list
55-
Capacity = size;
56-
_storage = new List<T>(data);
75+
if (data is ICollection<T> collection)
76+
{
77+
if (collection.Count > size)
78+
throw new ArgumentException($"The max size set for the queue is {size}, but got {collection.Count} initial values.");
79+
80+
foreach (var item in collection)
81+
Enqueue(item);
82+
return;
83+
}
5784

58-
// Now check if that list is a valid size.
59-
if (_storage.Count > Capacity)
60-
throw new ArgumentException($"The max size set for the quene is {size}, but got {_storage.Count} initial values.");
85+
var index = 0;
86+
foreach (var item in data)
87+
{
88+
if (index >= size)
89+
throw new ArgumentException($"The max size set for the queue is {size}, but got {index + 1} initial values.");
90+
91+
Enqueue(item);
92+
index++;
93+
}
6194
}
6295

6396
/// <summary>
64-
/// Enquene an element.
97+
/// Enqueue an element. When the queue is full the oldest element is overwritten.
6598
/// </summary>
66-
/// <returns></returns>
6799
public void Enqueue(T item)
68100
{
69-
_storage.Add(item);
70-
if (_storage.Count > Capacity)
71-
_storage.RemoveAt(0);
101+
if (_count < Capacity)
102+
{
103+
var tail = (_start + _count) % Capacity;
104+
_buffer[tail] = item;
105+
_count++;
106+
}
107+
else
108+
{
109+
_buffer[_start] = item;
110+
_start++;
111+
if (_start == Capacity)
112+
_start = 0;
113+
}
72114
}
73115

74116
/// <inheritdoc />
75117
public IEnumerator<T> GetEnumerator()
76118
{
77-
return _storage.GetEnumerator();
119+
return Enumerate().GetEnumerator();
78120
}
79121

80122
/// <inheritdoc />
@@ -83,17 +125,12 @@ IEnumerator IEnumerable.GetEnumerator()
83125
return GetEnumerator();
84126
}
85127

86-
internal ReadOnlySpan<T> AsSpan(int count)
128+
private IEnumerable<T> Enumerate()
87129
{
88-
// Ensure the request isn't for more tokens than actually exist
89-
count = Math.Min(count, Count);
90-
91-
// Take `count` items from the end
92-
#if NET8_0_OR_GREATER
93-
return CollectionsMarshal.AsSpan(_storage)[^count..];
94-
#else
95-
return _storage.ToArray().AsSpan(_storage.Count - count, count);
96-
#endif
130+
for (var i = 0; i < _count; i++)
131+
{
132+
yield return this[i];
133+
}
97134
}
98135
}
99136
}

LLama/Common/InferenceParams.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public record InferenceParams
1818
public int TokensKeep { get; set; } = 0;
1919

2020
/// <summary>
21-
/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response
21+
/// how many new tokens to predict (n_predict), set to -1 to infinitely generate response
2222
/// until it complete.
2323
/// </summary>
2424
public int MaxTokens { get; set; } = -1;

0 commit comments

Comments
 (0)