@@ -15,7 +15,7 @@ namespace zoo { namespace swar {
1515using u64 = uint64_t ;
1616using u32 = uint32_t ;
1717using u16 = uint16_t ;
18- using u8 = uint8_t ;
18+ using u8 = std:: uint8_t ;
1919
2020template <int LogNBits>
2121constexpr uint64_t popcount (uint64_t a) noexcept {
@@ -58,7 +58,10 @@ struct SWAR {
5858 SignificantBitsCount = BitWidth - PaddingBitsCount,
5959 AllOnes = ~std::make_unsigned_t <T>{0 } >> PaddingBitsCount,
6060 LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t <T>{1 }, NBits>::value,
61- MostSignificantBit = LeastSignificantBit << (NBits - 1 );
61+ MostSignificantBit = LeastSignificantBit << (NBits - 1 ),
62+ // Use LowerBits in favor of ~MostSignificantBit to not pollute
63+ // "don't care" bits when non-power-of-two bit lane sizes are supported
64+ LowerBits = MostSignificantBit - LeastSignificantBit;
6265
6366 SWAR () = default ;
6467 constexpr explicit SWAR (T v): m_v(v) {}
@@ -134,14 +137,16 @@ struct SWAR {
134137 // / We are not sure the optimizer would maintain this mask somewhere, if it was to recalculate it it would be disastrous for performance.
135138 constexpr SWAR
136139 shiftIntraLaneLeft (int bitCount, SWAR protectiveMask) const noexcept {
137- return SWAR{(*this & protectiveMask).value () << bitCount};
140+ T shiftC = static_cast <T>(bitCount); // could be a narrowing conversion
141+ auto V = (*this & protectiveMask).value ();
142+ return SWAR{static_cast <T>(V << shiftC)};
138143 }
139144
140145 // / \param protectiveMask should clear the bits that would cross the lane
141146 // / \sa shiftIntraLaneLeft
142147 constexpr SWAR
143148 shiftIntraLaneRight (int bitCount, SWAR protectiveMask) const noexcept {
144- return SWAR{(*this & protectiveMask).value () >> bitCount};
149+ return SWAR{(*this & protectiveMask).value () >> T{ bitCount} };
145150 }
146151
147152 T m_v;
0 commit comments