Skip to content

Commit 0a9bcff

Browse files
author
Alex Peck
committed
unroll freq
1 parent 874a515 commit 0a9bcff

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ public class SketchFrequency
2424
private CmSketchFlat<int, DisableHardwareIntrinsics> flatStd;
2525
private CmSketchFlat<int, DetectIsa> flatAvx;
2626

27-
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
27+
private CmSketchLooped<int, DisableHardwareIntrinsics> blockStdNoUnroll;
28+
private CmSketchCore<int, DisableHardwareIntrinsics> blockStdUnroll;
2829
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
2930
private CmSketchCore<int, DetectIsa> blockAvx;
3031

@@ -37,7 +38,8 @@ public void Setup()
3738
flatStd = new CmSketchFlat<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
3839
flatAvx = new CmSketchFlat<int, DetectIsa>(Size, EqualityComparer<int>.Default);
3940

40-
blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
41+
blockStdNoUnroll = new CmSketchLooped<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
42+
blockStdUnroll = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
4143
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4244
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
4345
}
@@ -67,7 +69,17 @@ public int FrequencyBlock()
6769
{
6870
int count = 0;
6971
for (int i = 0; i < iterations; i++)
70-
count += blockStd.EstimateFrequency(i) > blockStd.EstimateFrequency(i + 1) ? 1 : 0;
72+
count += blockStdNoUnroll.EstimateFrequency(i) > blockStdNoUnroll.EstimateFrequency(i + 1) ? 1 : 0;
73+
74+
return count;
75+
}
76+
77+
[Benchmark(OperationsPerInvoke = iterations)]
78+
public int FrequencyBlockUnroll()
79+
{
80+
int count = 0;
81+
for (int i = 0; i < iterations; i++)
82+
count += blockStdUnroll.EstimateFrequency(i) > blockStdUnroll.EstimateFrequency(i + 1) ? 1 : 0;
7183

7284
return count;
7385
}

BitFaster.Caching/Lfu/CmSketchCore.cs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#if !NETSTANDARD2_0
99
using System.Runtime.Intrinsics;
1010
using System.Runtime.Intrinsics.X86;
11+
1112
#endif
1213

1314
#if NET6_0_OR_GREATER
@@ -169,19 +170,31 @@ private void EnsureCapacity(long maximumSize)
169170

170171
private unsafe int EstimateFrequencyStd(T value)
171172
{
172-
var count = stackalloc int[4];
173173
int blockHash = Spread(comparer.GetHashCode(value));
174174
int counterHash = Rehash(blockHash);
175175
int block = (blockHash & blockMask) << 3;
176176

177-
for (int i = 0; i < 4; i++)
178-
{
179-
int h = (int)((uint)counterHash >> (i << 3));
180-
int index = (h >> 1) & 15;
181-
int offset = h & 1;
182-
count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL);
183-
}
184-
return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3]));
177+
int h0 = counterHash;
178+
int h1 = counterHash >>> 8;
179+
int h2 = counterHash >>> 16;
180+
int h3 = counterHash >>> 24;
181+
182+
int index0 = (h0 >>> 1) & 15;
183+
int index1 = (h1 >>> 1) & 15;
184+
int index2 = (h2 >>> 1) & 15;
185+
int index3 = (h3 >>> 1) & 15;
186+
187+
int slot0 = block + (h0 & 1);
188+
int slot1 = block + (h1 & 1) + 2;
189+
int slot2 = block + (h2 & 1) + 4;
190+
int slot3 = block + (h3 & 1) + 6;
191+
192+
int count0 = (int)((table[slot0] >>> (index0 << 2)) & 0xfL);
193+
int count1 = (int)((table[slot1] >>> (index1 << 2)) & 0xfL);
194+
int count2 = (int)((table[slot2] >>> (index2 << 2)) & 0xfL);
195+
int count3 = (int)((table[slot3] >>> (index3 << 2)) & 0xfL);
196+
197+
return Math.Min(Math.Min(count0, count1), Math.Min(count2, count3));
185198
}
186199

187200
private unsafe void IncrementStd(T value)
@@ -190,7 +203,7 @@ private unsafe void IncrementStd(T value)
190203
int counterHash = Rehash(blockHash);
191204
int block = (blockHash & blockMask) << 3;
192205

193-
// Loop unrolling improves throughput by 10m ops/s
206+
// Loop unrolling improves throughput
194207
int h0 = counterHash;
195208
int h1 = counterHash >>> 8;
196209
int h2 = counterHash >>> 16;

0 commit comments

Comments
 (0)