Skip to content

Commit 87105f6

Browse files
committed
opt
1 parent 75481ff commit 87105f6

File tree

1 file changed

+25
-75
lines changed

1 file changed

+25
-75
lines changed

BitFaster.Caching/Lfu/CmSketchCore.cs

Lines changed: 25 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Diagnostics.CodeAnalysis;
4+
using System.Runtime.CompilerServices;
45

56

67
#if !NETSTANDARD2_0
@@ -347,20 +348,16 @@ private unsafe void IncrementAvx(T value)
347348
#endif
348349

349350
#if NET6_0_OR_GREATER
351+
[MethodImpl(MethodImplOptions.AggressiveOptimization | MethodImplOptions.AggressiveInlining)]
350352
private unsafe void IncrementArm(T value)
351353
{
352354
int blockHash = Spread(comparer.GetHashCode(value));
353355
int counterHash = Rehash(blockHash);
354356
int block = (blockHash & blockMask) << 3;
355357

356-
Vector128<int> h = Vector128.Create(counterHash);
357-
h = AdvSimd.ShiftArithmetic(h, Vector128.Create(0, -8, -16, -24));
358-
359-
Vector128<int> index = AdvSimd.ShiftRightLogical(h, 1);
360-
index = AdvSimd.And(index, Vector128.Create(15)); // j - counter index
361-
Vector128<int> offset = AdvSimd.And(h, Vector128.Create(1));
362-
Vector128<int> blockOffset = AdvSimd.Add(Vector128.Create(block), offset); // i - table index
363-
blockOffset = AdvSimd.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
358+
Vector128<int> h = AdvSimd.ShiftArithmetic(Vector128.Create(counterHash), Vector128.Create(0, -8, -16, -24));
359+
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
360+
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
364361

365362
fixed (long* tablePtr = table)
366363
{
@@ -369,47 +366,24 @@ private unsafe void IncrementArm(T value)
369366
int t2 = AdvSimd.Extract(blockOffset, 2);
370367
int t3 = AdvSimd.Extract(blockOffset, 3);
371368

372-
var ta0 = AdvSimd.LoadVector64(tablePtr + t0);
373-
var ta1 = AdvSimd.LoadVector64(tablePtr + t1);
374-
var ta2 = AdvSimd.LoadVector64(tablePtr + t2);
375-
var ta3 = AdvSimd.LoadVector64(tablePtr + t3);
376-
377-
Vector128<long> tableVectorA = Vector128.Create(ta0, ta1);
378-
Vector128<long> tableVectorB = Vector128.Create(ta2, ta3);
379-
380-
// TODO: VectorTableLookup
381-
//Vector128<long> tableVectorA = Vector128.Create(
382-
// tablePtr[t0],
383-
// tablePtr[t1]);
384-
//Vector128<long> tableVectorB = Vector128.Create(
385-
// tablePtr[t2],
386-
// tablePtr[t3]);
369+
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + t0), AdvSimd.LoadVector64(tablePtr + t1));
370+
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + t2), AdvSimd.LoadVector64(tablePtr + t3));
387371

388-
// j == index
389372
index = AdvSimd.ShiftLeftLogicalSaturate(index, 2);
390373

391-
Vector128<int> longOffA = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0);
392-
longOffA = AdvSimd.Arm64.InsertSelectedScalar(longOffA, 2, index, 1);
393-
394-
Vector128<int> longOffB = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2);
395-
longOffB = AdvSimd.Arm64.InsertSelectedScalar(longOffB, 2, index, 3);
374+
Vector128<int> longOffA = AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0), 2, index, 1);
375+
Vector128<int> longOffB = AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2), 2, index, 3);
396376

397377
Vector128<long> fifteen = Vector128.Create(0xfL);
398378
Vector128<long> maskA = AdvSimd.ShiftArithmetic(fifteen, longOffA.AsInt64());
399379
Vector128<long> maskB = AdvSimd.ShiftArithmetic(fifteen, longOffB.AsInt64());
400380

401-
Vector128<long> maskedA = AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorA, maskA), maskA);
402-
Vector128<long> maskedB = AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorB, maskB), maskB);
381+
Vector128<long> maskedA = AdvSimd.Not(AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorA, maskA), maskA));
382+
Vector128<long> maskedB = AdvSimd.Not(AdvSimd.Arm64.CompareEqual(AdvSimd.And(tableVectorB, maskB), maskB));
403383

404384
var one = Vector128.Create(1L);
405-
Vector128<long> incA = AdvSimd.ShiftArithmetic(one, longOffA.AsInt64());
406-
Vector128<long> incB = AdvSimd.ShiftArithmetic(one, longOffB.AsInt64());
407-
408-
maskedA = AdvSimd.Not(maskedA);
409-
maskedB = AdvSimd.Not(maskedB);
410-
411-
incA = AdvSimd.And(maskedA, incA);
412-
incB = AdvSimd.And(maskedB, incB);
385+
Vector128<long> incA = AdvSimd.And(maskedA, AdvSimd.ShiftArithmetic(one, longOffA.AsInt64()));
386+
Vector128<long> incB = AdvSimd.And(maskedA, AdvSimd.ShiftArithmetic(one, longOffB.AsInt64()));
413387

414388
tablePtr[t0] += AdvSimd.Extract(incA, 0);
415389
tablePtr[t1] += AdvSimd.Extract(incA, 1);
@@ -428,57 +402,33 @@ private unsafe void IncrementArm(T value)
428402
}
429403
}
430404

405+
[MethodImpl(MethodImplOptions.AggressiveOptimization | MethodImplOptions.AggressiveInlining)]
431406
private unsafe int EstimateFrequencyArm(T value)
432407
{
433408
int blockHash = Spread(comparer.GetHashCode(value));
434409
int counterHash = Rehash(blockHash);
435410
int block = (blockHash & blockMask) << 3;
436411

437-
Vector128<int> h = Vector128.Create(counterHash);
438-
h = AdvSimd.ShiftArithmetic(h, Vector128.Create(0, -8, -16, -24));
439-
440-
Vector128<int> index = AdvSimd.ShiftRightLogical(h, 1);
441-
442-
index = AdvSimd.And(index, Vector128.Create(0xf)); // j - counter index
443-
Vector128<int> offset = AdvSimd.And(h, Vector128.Create(1));
444-
Vector128<int> blockOffset = AdvSimd.Add(Vector128.Create(block), offset); // i - table index
445-
blockOffset = AdvSimd.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
412+
Vector128<int> h = AdvSimd.ShiftArithmetic(Vector128.Create(counterHash), Vector128.Create(0, -8, -16, -24));
413+
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
414+
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));
446415

447416
fixed (long* tablePtr = table)
448417
{
449-
// TODO: VectorTableLookup
450-
Vector128<long> tableVectorA = Vector128.Create(
451-
tablePtr[AdvSimd.Extract(blockOffset, 0)],
452-
tablePtr[AdvSimd.Extract(blockOffset, 1)]);
453-
Vector128<long> tableVectorB = Vector128.Create(
454-
tablePtr[AdvSimd.Extract(blockOffset, 2)],
455-
tablePtr[AdvSimd.Extract(blockOffset, 3)]);
418+
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 0)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 1)));
419+
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 2)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 3)));
456420

457-
// j == index
458421
index = AdvSimd.ShiftLeftLogicalSaturate(index, 2);
459422

460-
Vector128<int> indexA = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0);
461-
indexA = AdvSimd.Arm64.InsertSelectedScalar(indexA, 2, index, 1);
462-
463-
Vector128<int> indexB = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2);
464-
indexB = AdvSimd.Arm64.InsertSelectedScalar(indexB, 2, index, 3);
465-
466-
indexA = AdvSimd.Negate(indexA);
467-
indexB = AdvSimd.Negate(indexB);
468-
469-
Vector128<long> a = AdvSimd.ShiftArithmetic(tableVectorA, indexA.AsInt64());
470-
Vector128<long> b = AdvSimd.ShiftArithmetic(tableVectorB, indexB.AsInt64());
423+
Vector128<int> indexA = AdvSimd.Negate(AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 0), 2, index, 1));
424+
Vector128<int> indexB = AdvSimd.Negate(AdvSimd.Arm64.InsertSelectedScalar(AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, index, 2), 2, index, 3));
471425

472426
var fifteen = Vector128.Create(0xfL);
473-
a = AdvSimd.And(a, fifteen);
474-
b = AdvSimd.And(b, fifteen);
475-
476-
// TODO: VectorTableLookup
477-
//Vector128<int> x = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, a.AsInt32(), 0);
478-
//x = AdvSimd.Arm64.InsertSelectedScalar(x, 1, a.AsInt32(), 2);
479-
//x = AdvSimd.Arm64.InsertSelectedScalar(x, 2, b.AsInt32(), 0);
480-
//x = AdvSimd.Arm64.InsertSelectedScalar(x, 3, b.AsInt32(), 2);
427+
Vector128<long> a = AdvSimd.And(AdvSimd.ShiftArithmetic(tableVectorA, indexA.AsInt64()), fifteen);
428+
Vector128<long> b = AdvSimd.And(AdvSimd.ShiftArithmetic(tableVectorB, indexB.AsInt64()), fifteen);
481429

430+
// Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
431+
// After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
482432
var min = AdvSimd.Arm64.VectorTableLookup(a.AsByte(), Vector128.Create(0x0B0A090803020100, 0xFFFFFFFFFFFFFFFF).AsByte());
483433
min = AdvSimd.Arm64.VectorTableLookupExtension(min, b.AsByte(), Vector128.Create(0xFFFFFFFFFFFFFFFF, 0x0B0A090803020100).AsByte());
484434

0 commit comments

Comments
 (0)