@@ -56,7 +56,9 @@ struct SWAR {
5656 NSlots = Lanes,
5757 PaddingBitsCount = BitWidth % NBits,
5858 SignificantBitsCount = BitWidth - PaddingBitsCount,
59- AllOnes = ~std::make_unsigned_t <T>{0 } >> PaddingBitsCount;
59+ AllOnes = ~std::make_unsigned_t <T>{0 } >> PaddingBitsCount,
60+ LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t <T>{1 }, NBits>::value,
61+ MostSignificantBit = LeastSignificantBit << (NBits - 1 );
6062
6163 SWAR () = default ;
6264 constexpr explicit SWAR (T v): m_v(v) {}
@@ -68,7 +70,7 @@ struct SWAR {
6870 X (SWAR, ~)
6971 // constexpr SWAR operator~() const noexcept { return SWAR{~m_v}; }
7072 #define SWAR_BINARY_OPERATORS_X_LIST \
71- X (SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +) X(SWAR, *)
73+ X (SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +)
7274
7375 #define X (rt, op ) constexpr rt operator op () const noexcept { return rt (op m_v); }
7476 SWAR_UNARY_OPERATORS_X_LIST
@@ -106,6 +108,17 @@ struct SWAR {
106108 return SWAR (m_v | (T (1 ) << (index * NBits + bit)));
107109 }
108110
111+ constexpr auto blitElement (int index, T value) const noexcept {
112+ auto elementMask = ((T (1 ) << NBits) - 1 ) << (index * NBits);
113+ return SWAR ((m_v & ~elementMask) | (value << (index * NBits)));
114+ }
115+
116+ constexpr SWAR blitElement (int index, SWAR other) const noexcept {
117+ constexpr auto OneElementMask = SWAR (~(~T (0 ) << NBits));
118+ auto IsolationMask = OneElementMask.shiftLanesLeft (index);
119+ return (*this & ~IsolationMask) | (other & IsolationMask);
120+ }
121+
109122 constexpr SWAR shiftLanesLeft (int laneCount) const noexcept {
110123 return SWAR (value () << (NBits * laneCount));
111124 }
@@ -114,15 +127,21 @@ struct SWAR {
114127 return SWAR (value () >> (NBits * laneCount));
115128 }
116129
117- constexpr auto blitElement (int index, T value) const noexcept {
118- auto elementMask = ((T (1 ) << NBits) - 1 ) << (index * NBits);
119- return SWAR ((m_v & ~elementMask) | (value << (index * NBits)));
130+ // / \brief as the name suggests
131+ // / \param protectiveMask should clear the bits that would cross the lane.
132+ // / The bits that will be cleared are directly related to the count of shifts, it is natural to maintain
133+ // / the protective mask by the caller, otherwise, the mask will be computed on all invocations.
134+ // / We are not sure the optimizer would maintain this mask somewhere, if it was to recalculate it it would be disastrous for performance.
135+ constexpr SWAR
136+ shiftIntraLaneLeft (int bitCount, SWAR protectiveMask) const noexcept {
137+ return SWAR{(*this & protectiveMask).value () << bitCount};
120138 }
121139
122- constexpr SWAR blitElement (int index, SWAR other) const noexcept {
123- constexpr auto OneElementMask = SWAR (~(~T (0 ) << NBits));
124- auto IsolationMask = OneElementMask.shiftLanesLeft (index);
125- return (*this & ~IsolationMask) | (other & IsolationMask);
140+ // / \param protectiveMask should clear the bits that would cross the lane
141+ // / \sa shiftIntraLaneLeft
142+ constexpr SWAR
143+ shiftIntraLaneRight (int bitCount, SWAR protectiveMask) const noexcept {
144+ return SWAR{(*this & protectiveMask).value () >> bitCount};
126145 }
127146
128147 T m_v;
@@ -299,7 +318,7 @@ constexpr auto broadcast(SWAR<NBits, T> v) {
299318// / BooleanSWAR treats the MSB of each SWAR lane as the boolean associated with that lane.
300319template <int NBits, typename T>
301320struct BooleanSWAR : SWAR<NBits, T> {
302- // Booleanness is stored in MSB of a given swar.
321+ // Booleanness is stored in the MSBs
303322 static constexpr auto MaskLaneMSB =
304323 broadcast<NBits, T>(SWAR<NBits, T>(T(1 ) << (NBits -1 )));
305324 constexpr explicit BooleanSWAR (T v): SWAR<NBits, T>(v) {}
0 commit comments