66
77#include < type_traits>
88
9+ #ifdef _MSC_VER
10+ #include < iso646.h>
11+ #endif
12+
913namespace zoo { namespace swar {
1014
1115using u64 = uint64_t ;
@@ -27,13 +31,14 @@ constexpr uint64_t popcount(uint64_t a) noexcept {
2731// / Index into the bits of the type T that contains the MSB.
2832template <typename T>
2933constexpr std::make_unsigned_t <T> msbIndex (T v) noexcept {
30- return 8 * sizeof (T) - 1 - __builtin_clzll (v);
34+ return meta::logFloor (v);
3135}
3236
3337// / Index into the bits of the type T that contains the LSB.
3438template <typename T>
3539constexpr std::make_unsigned_t <T> lsbIndex (T v) noexcept {
36- return __builtin_ctzll (v) + 1 ;
40+ // ~v & (v - 1) turns on all trailing zeroes, zeroes the rest
41+ return meta::logFloor (1 + (~v & (v - 1 )));
3742}
3843
3944// / Core abstraction around SIMD Within A Register (SWAR). Specifies 'lanes'
@@ -44,12 +49,14 @@ constexpr std::make_unsigned_t<T> lsbIndex(T v) noexcept {
4449template <int NBits_, typename T = uint64_t >
4550struct SWAR {
4651 using type = T;
47- constexpr static inline auto NBits = NBits_;
48- constexpr static inline auto Lanes = sizeof (T) * 8 / NBits;
49- constexpr static inline auto NSlots = Lanes;
50- constexpr static T BitMod = sizeof (T)*8 % NBits;
51- constexpr static T ValidBitsCount = sizeof (T)*8 - BitMod;
52- constexpr static T AllOnes = (BitMod == 0 ) ? ~(T(0 )) : ((T(1 ) << ValidBitsCount) -1 );
52+ constexpr static inline std::make_unsigned_t <T>
53+ NBits = NBits_,
54+ BitWidth = sizeof (T) * 8 ,
55+ Lanes = BitWidth / NBits,
56+ NSlots = Lanes,
57+ PaddingBitsCount = BitWidth % NBits,
58+ SignificantBitsCount = BitWidth - PaddingBitsCount,
59+ AllOnes = ~std::make_unsigned_t <T>{0 } >> PaddingBitsCount;
5360
5461 SWAR () = default ;
5562 constexpr explicit SWAR (T v): m_v(v) {}
@@ -92,13 +99,21 @@ struct SWAR {
9299
93100 // / The SWAR lane index that contains the MSB. It is not the bit index of the MSB.
94101 // / IE: 4 bit wide 32 bit SWAR: 0x0040'0000 will return 5, not 22 (0 indexed).
95- constexpr int top () const noexcept { return msbIndex (m_v) / NBits; }
96- constexpr int lsbIndex () const noexcept { return __builtin_ctzll (m_v) / NBits; }
102+ constexpr auto top () const noexcept { return msbIndex (m_v) / NBits; }
103+ constexpr auto lsbIndex () const noexcept { return swar::lsbIndex (m_v) / NBits; }
97104
98105 constexpr SWAR setBit (int index, int bit) const noexcept {
99106 return SWAR (m_v | (T (1 ) << (index * NBits + bit)));
100107 }
101108
109+ constexpr SWAR shiftLanesLeft (int laneCount) const noexcept {
110+ return SWAR (value () << (NBits * laneCount));
111+ }
112+
113+ constexpr SWAR shiftLanesRight (int laneCount) const noexcept {
114+ return SWAR (value () >> (NBits * laneCount));
115+ }
116+
102117 constexpr auto blitElement (int index, T value) const noexcept {
103118 auto elementMask = ((T (1 ) << NBits) - 1 ) << (index * NBits);
104119 return SWAR ((m_v & ~elementMask) | (value << (index * NBits)));
@@ -110,14 +125,6 @@ struct SWAR {
110125 return (*this & ~IsolationMask) | (other & IsolationMask);
111126 }
112127
113- constexpr SWAR shiftLanesLeft (int laneCount) const noexcept {
114- return SWAR (value () << (NBits * laneCount));
115- }
116-
117- constexpr SWAR shiftLanesRight (int laneCount) const noexcept {
118- return SWAR (value () >> (NBits * laneCount));
119- }
120-
121128 T m_v;
122129};
123130
0 commit comments