Skip to content

Commit d7935d2

Browse files
committed
test
1 parent 2c52ed2 commit d7935d2

File tree

3 files changed

+376
-0
lines changed

3 files changed

+376
-0
lines changed
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Diagnostics.CodeAnalysis;
4+
using System.Linq;
5+
using System.Runtime.CompilerServices;
6+
using System.Text;
7+
using System.Threading.Tasks;
8+
9+
10+
#if NET6_0_OR_GREATER
11+
using System.Runtime.Intrinsics;
12+
using System.Runtime.Intrinsics.X86;
13+
#endif
14+
15+
namespace BitFaster.Caching.Benchmarks.Lfu
16+
{
17+
public unsafe class CmSketchPinNoOpt<T, I>
18+
where T : notnull
19+
where I : struct, IsaProbe
20+
{
21+
private const long ResetMask = 0x7777777777777777L;
22+
private const long OneMask = 0x1111111111111111L;
23+
24+
private long[] table;
25+
#if NET6_0_OR_GREATER
26+
private long* tableAddr;
27+
#endif
28+
private int sampleSize;
29+
private int blockMask;
30+
private int size;
31+
32+
private readonly IEqualityComparer<T> comparer;
33+
34+
/// <summary>
35+
/// Initializes a new instance of the CmSketch class with the specified maximum size and equality comparer.
36+
/// </summary>
37+
/// <param name="maximumSize">The maximum size.</param>
38+
/// <param name="comparer">The equality comparer.</param>
39+
public CmSketchPinNoOpt(long maximumSize, IEqualityComparer<T> comparer)
40+
{
41+
EnsureCapacity(maximumSize);
42+
this.comparer = comparer;
43+
}
44+
45+
/// <summary>
46+
/// Gets the reset sample size.
47+
/// </summary>
48+
public int ResetSampleSize => this.sampleSize;
49+
50+
/// <summary>
51+
/// Gets the size.
52+
/// </summary>
53+
public int Size => this.size;
54+
55+
/// <summary>
56+
/// Estimate the frequency of the specified value, up to the maximum of 15.
57+
/// </summary>
58+
/// <param name="value">The value.</param>
59+
/// <returns>The estimated frequency of the value.</returns>
60+
public int EstimateFrequency(T value)
61+
{
62+
#if NET48
63+
return EstimateFrequencyStd(value);
64+
#else
65+
66+
I isa = default;
67+
68+
if (isa.IsAvx2Supported)
69+
{
70+
return EstimateFrequencyAvx(value);
71+
}
72+
else
73+
{
74+
return EstimateFrequencyStd(value);
75+
}
76+
#endif
77+
}
78+
79+
/// <summary>
80+
/// Increment the count of the specified value.
81+
/// </summary>
82+
/// <param name="value">The value.</param>
83+
public void Increment(T value)
84+
{
85+
#if NET48
86+
IncrementStd(value);
87+
#else
88+
89+
I isa = default;
90+
91+
if (isa.IsAvx2Supported)
92+
{
93+
IncrementAvx(value);
94+
}
95+
else
96+
{
97+
IncrementStd(value);
98+
}
99+
#endif
100+
}
101+
102+
/// <summary>
103+
/// Clears the count for all items.
104+
/// </summary>
105+
public void Clear()
106+
{
107+
Array.Clear(table, 0, table.Length);
108+
size = 0;
109+
}
110+
111+
//[MemberNotNull(nameof(table))]
112+
private void EnsureCapacity(long maximumSize)
113+
{
114+
int maximum = (int)Math.Min(maximumSize, int.MaxValue >> 1);
115+
116+
#if NET6_0_OR_GREATER
117+
I isa = default;
118+
if (isa.IsAvx2Supported)
119+
{
120+
// over alloc by 8 to give 64 bytes padding, tableAddr is then aligned to 64 bytes
121+
const int pad = 8;
122+
bool pinned = true;
123+
table = GC.AllocateArray<long>(Math.Max(BitOps.CeilingPowerOfTwo(maximum), 8) + pad, pinned);
124+
125+
tableAddr = (long*)Unsafe.AsPointer(ref table[0]);
126+
tableAddr = (long*)((long)tableAddr + (long)tableAddr % 64);
127+
128+
blockMask = (int)((uint)(table.Length - pad) >> 3) - 1;
129+
}
130+
else
131+
#endif
132+
{
133+
table = new long[Math.Max(BitOps.CeilingPowerOfTwo(maximum), 8)];
134+
blockMask = (int)((uint)(table.Length) >> 3) - 1;
135+
}
136+
137+
sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);
138+
139+
size = 0;
140+
}
141+
142+
private unsafe int EstimateFrequencyStd(T value)
143+
{
144+
var count = stackalloc int[4];
145+
int blockHash = Spread(comparer.GetHashCode(value));
146+
int counterHash = Rehash(blockHash);
147+
int block = (blockHash & blockMask) << 3;
148+
149+
for (int i = 0; i < 4; i++)
150+
{
151+
int h = (int)((uint)counterHash >> (i << 3));
152+
int index = (h >> 1) & 15;
153+
int offset = h & 1;
154+
count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL);
155+
}
156+
return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3]));
157+
}
158+
159+
private unsafe void IncrementStd(T value)
160+
{
161+
var index = stackalloc int[8];
162+
int blockHash = Spread(comparer.GetHashCode(value));
163+
int counterHash = Rehash(blockHash);
164+
int block = (blockHash & blockMask) << 3;
165+
166+
for (int i = 0; i < 4; i++)
167+
{
168+
int h = (int)((uint)counterHash >> (i << 3));
169+
index[i] = (h >> 1) & 15;
170+
int offset = h & 1;
171+
index[i + 4] = block + offset + (i << 1);
172+
}
173+
174+
bool added =
175+
IncrementAt(index[4], index[0])
176+
| IncrementAt(index[5], index[1])
177+
| IncrementAt(index[6], index[2])
178+
| IncrementAt(index[7], index[3]);
179+
180+
if (added && (++size == sampleSize))
181+
{
182+
Reset();
183+
}
184+
}
185+
186+
// Applies another round of hashing for additional randomization.
187+
private static int Rehash(int x)
188+
{
189+
x = (int)(x * 0x31848bab);
190+
x ^= (int)((uint)x >> 14);
191+
return x;
192+
}
193+
194+
// Applies a supplemental hash function to defend against poor quality hash.
195+
private static int Spread(int x)
196+
{
197+
x ^= (int)((uint)x >> 17);
198+
x = (int)(x * 0xed5ad4bb);
199+
x ^= (int)((uint)x >> 11);
200+
x = (int)(x * 0xac4c1b51);
201+
x ^= (int)((uint)x >> 15);
202+
return x;
203+
}
204+
205+
private bool IncrementAt(int i, int j)
206+
{
207+
int offset = j << 2;
208+
long mask = (0xfL << offset);
209+
210+
if ((table[i] & mask) != mask)
211+
{
212+
table[i] += (1L << offset);
213+
return true;
214+
}
215+
216+
return false;
217+
}
218+
219+
private void Reset()
220+
{
221+
// unroll, almost 2x faster
222+
int count0 = 0;
223+
int count1 = 0;
224+
int count2 = 0;
225+
int count3 = 0;
226+
227+
for (int i = 0; i < table.Length; i += 4)
228+
{
229+
count0 += BitOps.BitCount(table[i] & OneMask);
230+
count1 += BitOps.BitCount(table[i + 1] & OneMask);
231+
count2 += BitOps.BitCount(table[i + 2] & OneMask);
232+
count3 += BitOps.BitCount(table[i + 3] & OneMask);
233+
234+
table[i] = (long)((ulong)table[i] >> 1) & ResetMask;
235+
table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask;
236+
table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask;
237+
table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask;
238+
}
239+
240+
count0 = (count0 + count1) + (count2 + count3);
241+
242+
size = (size - (count0 >> 2)) >> 1;
243+
}
244+
245+
#if NET6_0_OR_GREATER
246+
private unsafe int EstimateFrequencyAvx(T value)
247+
{
248+
int blockHash = Spread(comparer.GetHashCode(value));
249+
int counterHash = Rehash(blockHash);
250+
int block = (blockHash & blockMask) << 3;
251+
252+
Vector128<int> h = Vector128.Create(counterHash);
253+
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
254+
255+
var index = Avx2.ShiftRightLogical(h, 1);
256+
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
257+
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
258+
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
259+
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
260+
261+
#if NET6_0_OR_GREATER
262+
long* tablePtr = tableAddr;
263+
#else
264+
fixed (long* tablePtr = table)
265+
#endif
266+
{
267+
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
268+
index = Avx2.ShiftLeftLogical(index, 2);
269+
270+
// convert index from int to long via permute
271+
Vector256<long> indexLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
272+
Vector256<int> permuteMask2 = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
273+
indexLong = Avx2.PermuteVar8x32(indexLong.AsInt32(), permuteMask2).AsInt64();
274+
tableVector = Avx2.ShiftRightLogicalVariable(tableVector, indexLong.AsUInt64());
275+
tableVector = Avx2.And(tableVector, Vector256.Create(0xfL));
276+
277+
Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
278+
Vector128<ushort> count = Avx2.PermuteVar8x32(tableVector.AsInt32(), permuteMask)
279+
.GetLower()
280+
.AsUInt16();
281+
282+
// set the zeroed high parts of the long value to ushort.Max
283+
#if NET6_0_OR_GREATER
284+
count = Avx2.Blend(count, Vector128<ushort>.AllBitsSet, 0b10101010);
285+
#else
286+
count = Avx2.Blend(count, Vector128.Create(ushort.MaxValue), 0b10101010);
287+
#endif
288+
289+
return Avx2.MinHorizontal(count).GetElement(0);
290+
}
291+
}
292+
293+
private unsafe void IncrementAvx(T value)
294+
{
295+
int blockHash = Spread(comparer.GetHashCode(value));
296+
int counterHash = Rehash(blockHash);
297+
int block = (blockHash & blockMask) << 3;
298+
299+
Vector128<int> h = Vector128.Create(counterHash);
300+
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
301+
302+
Vector128<int> index = Avx2.ShiftRightLogical(h, 1);
303+
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
304+
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
305+
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
306+
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
307+
308+
#if NET6_0_OR_GREATER
309+
long* tablePtr = tableAddr;
310+
#else
311+
fixed (long* tablePtr = table)
312+
#endif
313+
{
314+
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
315+
316+
// j == index
317+
index = Avx2.ShiftLeftLogical(index, 2);
318+
Vector256<long> offsetLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
319+
320+
Vector256<int> permuteMask = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
321+
offsetLong = Avx2.PermuteVar8x32(offsetLong.AsInt32(), permuteMask).AsInt64();
322+
323+
// mask = (0xfL << offset)
324+
Vector256<long> fifteen = Vector256.Create(0xfL);
325+
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(fifteen, offsetLong.AsUInt64());
326+
327+
// (table[i] & mask) != mask)
328+
// Note masked is 'equal' - therefore use AndNot below
329+
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(tableVector, mask), mask);
330+
331+
// 1L << offset
332+
Vector256<long> inc = Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offsetLong.AsUInt64());
333+
334+
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
335+
inc = Avx2.AndNot(masked, inc);
336+
337+
Vector256<byte> result = Avx2.CompareEqual(masked.AsByte(), Vector256<byte>.Zero);
338+
bool wasInc = Avx2.MoveMask(result.AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));
339+
340+
tablePtr[blockOffset.GetElement(0)] += inc.GetElement(0);
341+
tablePtr[blockOffset.GetElement(1)] += inc.GetElement(1);
342+
tablePtr[blockOffset.GetElement(2)] += inc.GetElement(2);
343+
tablePtr[blockOffset.GetElement(3)] += inc.GetElement(3);
344+
345+
if (wasInc && (++size == sampleSize))
346+
{
347+
Reset();
348+
}
349+
}
350+
}
351+
#endif
352+
}
353+
}

BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class SketchFrequency
2626

2727
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
2828
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
29+
private CmSketchPinNoOpt<int, DetectIsa> blockAvxPinNoOpt;
2930
private CmSketchCore<int, DetectIsa> blockAvx;
3031

3132
[Params(512, 1024, 32_768, 524_288, 8_388_608, 134_217_728)]
@@ -39,6 +40,7 @@ public void Setup()
3940

4041
blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
4142
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
43+
blockAvxPinNoOpt = new CmSketchPinNoOpt<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4244
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4345
}
4446

@@ -82,6 +84,16 @@ public int FrequencyBlockAvxNotPinned()
8284
return count;
8385
}
8486

87+
[Benchmark(OperationsPerInvoke = iterations)]
88+
public int FrequencyBlockAvxPinNotOpt()
89+
{
90+
int count = 0;
91+
for (int i = 0; i < iterations; i++)
92+
count += blockAvxPinNoOpt.EstimateFrequency(i) > blockAvx.EstimateFrequency(i + 1) ? 1 : 0;
93+
94+
return count;
95+
}
96+
8597
[Benchmark(OperationsPerInvoke = iterations)]
8698
public int FrequencyBlockAvxPinned()
8799
{

0 commit comments

Comments
 (0)