@@ -300,25 +300,16 @@ private unsafe int EstimateFrequencyAvx(T value)
300300
301301 Vector256 < ulong > indexLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
302302
303- #if NET
304303 long * tablePtr = tableAddr ;
305- #else
306- fixed ( long * tablePtr = table )
307- #endif
308- {
309- Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( Avx2 . And ( Avx2 . ShiftRightLogicalVariable ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , indexLong ) , Vector256 . Create ( 0xfL ) ) . AsInt32 ( ) , Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) )
310- . GetLower ( )
311- . AsUInt16 ( ) ;
312304
313- // set the zeroed high parts of the long value to ushort.Max
314- #if NET
315- count = Avx2 . Blend ( count , Vector128 < ushort > . AllBitsSet , 0b10101010 ) ;
316- #else
317- count = Avx2 . Blend ( count , Vector128 . Create ( ushort . MaxValue ) , 0b10101010 ) ;
318- #endif
305+ Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( Avx2 . And ( Avx2 . ShiftRightLogicalVariable ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , indexLong ) , Vector256 . Create ( 0xfL ) ) . AsInt32 ( ) , Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) )
306+ . GetLower ( )
307+ . AsUInt16 ( ) ;
319308
320- return Avx2 . MinHorizontal ( count ) . GetElement ( 0 ) ;
321- }
309+ // set the zeroed high parts of the long value to ushort.Max
310+ count = Avx2 . Blend ( count , Vector128 < ushort > . AllBitsSet , 0b10101010 ) ;
311+
312+ return Avx2 . MinHorizontal ( count ) . GetElement ( 0 ) ;
322313 }
323314
324315 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
@@ -335,29 +326,24 @@ private unsafe void IncrementAvx(T value)
335326 Vector256 < ulong > offsetLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
336327 Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 0xfL ) , offsetLong ) ;
337328
338- #if NET
339329 long * tablePtr = tableAddr ;
340- #else
341- fixed ( long * tablePtr = table )
342- #endif
343- {
344- // Note masked is 'equal' - therefore use AndNot below
345- Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , mask ) , mask ) ;
346330
347- // Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
348- Vector256 < long > inc = Avx2 . AndNot ( masked , Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong ) ) ;
331+ // Note masked is 'equal' - therefore use AndNot below
332+ Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , mask ) , mask ) ;
349333
350- bool wasInc = Avx2 . MoveMask ( Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
334+ // Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
335+ Vector256 < long > inc = Avx2 . AndNot ( masked , Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong ) ) ;
351336
352- tablePtr [ blockOffset . GetElement ( 0 ) ] += inc . GetElement ( 0 ) ;
353- tablePtr [ blockOffset . GetElement ( 1 ) ] += inc . GetElement ( 1 ) ;
354- tablePtr [ blockOffset . GetElement ( 2 ) ] += inc . GetElement ( 2 ) ;
355- tablePtr [ blockOffset . GetElement ( 3 ) ] += inc . GetElement ( 3 ) ;
337+ bool wasInc = Avx2 . MoveMask ( Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) . AsByte ( ) ) == unchecked ( ( int ) 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ;
356338
357- if ( wasInc && ( ++ size == sampleSize ) )
358- {
359- Reset ( ) ;
360- }
339+ tablePtr [ blockOffset . GetElement ( 0 ) ] += inc . GetElement ( 0 ) ;
340+ tablePtr [ blockOffset . GetElement ( 1 ) ] += inc . GetElement ( 1 ) ;
341+ tablePtr [ blockOffset . GetElement ( 2 ) ] += inc . GetElement ( 2 ) ;
342+ tablePtr [ blockOffset . GetElement ( 3 ) ] += inc . GetElement ( 3 ) ;
343+
344+ if ( wasInc && ( ++ size == sampleSize ) )
345+ {
346+ Reset ( ) ;
361347 }
362348 }
363349#endif
0 commit comments