11using System ;
22using System . Collections . Generic ;
33using System . Diagnostics . CodeAnalysis ;
4+ using System . Runtime . CompilerServices ;
45
56
67#if ! NETSTANDARD2_0
@@ -347,20 +348,16 @@ private unsafe void IncrementAvx(T value)
347348#endif
348349
349350#if NET6_0_OR_GREATER
351+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization | MethodImplOptions . AggressiveInlining ) ]
350352 private unsafe void IncrementArm ( T value )
351353 {
352354 int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
353355 int counterHash = Rehash ( blockHash ) ;
354356 int block = ( blockHash & blockMask ) << 3 ;
355357
356- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
357- h = AdvSimd . ShiftArithmetic ( h , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
358-
359- Vector128 < int > index = AdvSimd . ShiftRightLogical ( h , 1 ) ;
360- index = AdvSimd . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
361- Vector128 < int > offset = AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ;
362- Vector128 < int > blockOffset = AdvSimd . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
363- blockOffset = AdvSimd . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
358+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
359+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
360+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
364361
365362 fixed ( long * tablePtr = table )
366363 {
@@ -369,47 +366,24 @@ private unsafe void IncrementArm(T value)
369366 int t2 = AdvSimd . Extract ( blockOffset , 2 ) ;
370367 int t3 = AdvSimd . Extract ( blockOffset , 3 ) ;
371368
372- var ta0 = AdvSimd . LoadVector64 ( tablePtr + t0 ) ;
373- var ta1 = AdvSimd . LoadVector64 ( tablePtr + t1 ) ;
374- var ta2 = AdvSimd . LoadVector64 ( tablePtr + t2 ) ;
375- var ta3 = AdvSimd . LoadVector64 ( tablePtr + t3 ) ;
376-
377- Vector128 < long > tableVectorA = Vector128 . Create ( ta0 , ta1 ) ;
378- Vector128 < long > tableVectorB = Vector128 . Create ( ta2 , ta3 ) ;
379-
380- // TODO: VectorTableLookup
381- //Vector128<long> tableVectorA = Vector128.Create(
382- // tablePtr[t0],
383- // tablePtr[t1]);
384- //Vector128<long> tableVectorB = Vector128.Create(
385- // tablePtr[t2],
386- // tablePtr[t3]);
369+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t0 ) , AdvSimd . LoadVector64 ( tablePtr + t1 ) ) ;
370+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t2 ) , AdvSimd . LoadVector64 ( tablePtr + t3 ) ) ;
387371
388- // j == index
389372 index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
390373
391- Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) ;
392- longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( longOffA , 2 , index , 1 ) ;
393-
394- Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) ;
395- longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( longOffB , 2 , index , 3 ) ;
374+ Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ;
375+ Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ;
396376
397377 Vector128 < long > fifteen = Vector128 . Create ( 0xfL ) ;
398378 Vector128 < long > maskA = AdvSimd . ShiftArithmetic ( fifteen , longOffA . AsInt64 ( ) ) ;
399379 Vector128 < long > maskB = AdvSimd . ShiftArithmetic ( fifteen , longOffB . AsInt64 ( ) ) ;
400380
401- Vector128 < long > maskedA = AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ;
402- Vector128 < long > maskedB = AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ;
381+ Vector128 < long > maskedA = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ) ;
382+ Vector128 < long > maskedB = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ) ;
403383
404384 var one = Vector128 . Create ( 1L ) ;
405- Vector128 < long > incA = AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ;
406- Vector128 < long > incB = AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ;
407-
408- maskedA = AdvSimd . Not ( maskedA ) ;
409- maskedB = AdvSimd . Not ( maskedB ) ;
410-
411- incA = AdvSimd . And ( maskedA , incA ) ;
412- incB = AdvSimd . And ( maskedB , incB ) ;
385+ Vector128 < long > incA = AdvSimd . And ( maskedA , AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ) ;
386+ Vector128 < long > incB = AdvSimd . And ( maskedA , AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ) ;
413387
414388 tablePtr [ t0 ] += AdvSimd . Extract ( incA , 0 ) ;
415389 tablePtr [ t1 ] += AdvSimd . Extract ( incA , 1 ) ;
@@ -428,57 +402,33 @@ private unsafe void IncrementArm(T value)
428402 }
429403 }
430404
405+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization | MethodImplOptions . AggressiveInlining ) ]
431406 private unsafe int EstimateFrequencyArm ( T value )
432407 {
433408 int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
434409 int counterHash = Rehash ( blockHash ) ;
435410 int block = ( blockHash & blockMask ) << 3 ;
436411
437- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
438- h = AdvSimd . ShiftArithmetic ( h , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
439-
440- Vector128 < int > index = AdvSimd . ShiftRightLogical ( h , 1 ) ;
441-
442- index = AdvSimd . And ( index , Vector128 . Create ( 0xf ) ) ; // j - counter index
443- Vector128 < int > offset = AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ;
444- Vector128 < int > blockOffset = AdvSimd . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
445- blockOffset = AdvSimd . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
412+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
413+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
414+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
446415
447416 fixed ( long * tablePtr = table )
448417 {
449- // TODO: VectorTableLookup
450- Vector128 < long > tableVectorA = Vector128 . Create (
451- tablePtr [ AdvSimd . Extract ( blockOffset , 0 ) ] ,
452- tablePtr [ AdvSimd . Extract ( blockOffset , 1 ) ] ) ;
453- Vector128 < long > tableVectorB = Vector128 . Create (
454- tablePtr [ AdvSimd . Extract ( blockOffset , 2 ) ] ,
455- tablePtr [ AdvSimd . Extract ( blockOffset , 3 ) ] ) ;
418+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 0 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 1 ) ) ) ;
419+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 2 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 3 ) ) ) ;
456420
457- // j == index
458421 index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
459422
460- Vector128 < int > indexA = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) ;
461- indexA = AdvSimd . Arm64 . InsertSelectedScalar ( indexA , 2 , index , 1 ) ;
462-
463- Vector128 < int > indexB = AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) ;
464- indexB = AdvSimd . Arm64 . InsertSelectedScalar ( indexB , 2 , index , 3 ) ;
465-
466- indexA = AdvSimd . Negate ( indexA ) ;
467- indexB = AdvSimd . Negate ( indexB ) ;
468-
469- Vector128 < long > a = AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) ;
470- Vector128 < long > b = AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) ;
423+ Vector128 < int > indexA = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ) ;
424+ Vector128 < int > indexB = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ) ;
471425
472426 var fifteen = Vector128 . Create ( 0xfL ) ;
473- a = AdvSimd . And ( a , fifteen ) ;
474- b = AdvSimd . And ( b , fifteen ) ;
475-
476- // TODO: VectorTableLookup
477- //Vector128<int> x = AdvSimd.Arm64.InsertSelectedScalar(Vector128<int>.Zero, 0, a.AsInt32(), 0);
478- //x = AdvSimd.Arm64.InsertSelectedScalar(x, 1, a.AsInt32(), 2);
479- //x = AdvSimd.Arm64.InsertSelectedScalar(x, 2, b.AsInt32(), 0);
480- //x = AdvSimd.Arm64.InsertSelectedScalar(x, 3, b.AsInt32(), 2);
427+ Vector128 < long > a = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) , fifteen ) ;
428+ Vector128 < long > b = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) , fifteen ) ;
481429
430+ // Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
431+ // After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
482432 var min = AdvSimd . Arm64 . VectorTableLookup ( a . AsByte ( ) , Vector128 . Create ( 0x0B0A090803020100 , 0xFFFFFFFFFFFFFFFF ) . AsByte ( ) ) ;
483433 min = AdvSimd . Arm64 . VectorTableLookupExtension ( min , b . AsByte ( ) , Vector128 . Create ( 0xFFFFFFFFFFFFFFFF , 0x0B0A090803020100 ) . AsByte ( ) ) ;
484434
0 commit comments