Skip to content

Commit 8dabc00

Browse files
SWAR Multiplication via addition chains (#58)
* PR * Omission from prior commit * Draft of parity or parallel suffix * Restores building * Generalizes to exponentiation! * Clean, tested implementations * Addresses request to comment in PR review * re-enable windows build * Update test/swar/BasicOperations.cpp * Update inc/zoo/swar/SWAR.h * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp * Update test/swar/BasicOperations.cpp --------- Co-authored-by: Eddie <eddie see email elsewhere> Co-authored-by: Scottbruceheart <105394870+Scottbruceheart@users.noreply.github.com>
1 parent dd61866 commit 8dabc00

File tree

2 files changed

+377
-10
lines changed

2 files changed

+377
-10
lines changed

inc/zoo/swar/SWAR.h

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ struct SWAR {
5656
NSlots = Lanes,
5757
PaddingBitsCount = BitWidth % NBits,
5858
SignificantBitsCount = BitWidth - PaddingBitsCount,
59-
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount;
59+
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount,
60+
LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
61+
MostSignificantBit = LeastSignificantBit << (NBits - 1);
6062

6163
SWAR() = default;
6264
constexpr explicit SWAR(T v): m_v(v) {}
@@ -68,7 +70,7 @@ struct SWAR {
6870
X(SWAR, ~)
6971
//constexpr SWAR operator~() const noexcept { return SWAR{~m_v}; }
7072
#define SWAR_BINARY_OPERATORS_X_LIST \
71-
X(SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +) X(SWAR, *)
73+
X(SWAR, &) X(SWAR, ^) X(SWAR, |) X(SWAR, -) X(SWAR, +)
7274

7375
#define X(rt, op) constexpr rt operator op() const noexcept { return rt(op m_v); }
7476
SWAR_UNARY_OPERATORS_X_LIST
@@ -106,6 +108,17 @@ struct SWAR {
106108
return SWAR(m_v | (T(1) << (index * NBits + bit)));
107109
}
108110

111+
constexpr auto blitElement(int index, T value) const noexcept {
112+
auto elementMask = ((T(1) << NBits) - 1) << (index * NBits);
113+
return SWAR((m_v & ~elementMask) | (value << (index * NBits)));
114+
}
115+
116+
constexpr SWAR blitElement(int index, SWAR other) const noexcept {
117+
constexpr auto OneElementMask = SWAR(~(~T(0) << NBits));
118+
auto IsolationMask = OneElementMask.shiftLanesLeft(index);
119+
return (*this & ~IsolationMask) | (other & IsolationMask);
120+
}
121+
109122
constexpr SWAR shiftLanesLeft(int laneCount) const noexcept {
110123
return SWAR(value() << (NBits * laneCount));
111124
}
@@ -114,15 +127,21 @@ struct SWAR {
114127
return SWAR(value() >> (NBits * laneCount));
115128
}
116129

117-
constexpr auto blitElement(int index, T value) const noexcept {
118-
auto elementMask = ((T(1) << NBits) - 1) << (index * NBits);
119-
return SWAR((m_v & ~elementMask) | (value << (index * NBits)));
130+
/// \brief as the name suggests
131+
/// \param protectiveMask should clear the bits that would cross the lane.
132+
/// The bits that will be cleared are directly related to the count of shifts, it is natural to maintain
133+
/// the protective mask by the caller, otherwise, the mask will be computed on all invocations.
134+
/// We are not sure the optimizer would maintain this mask somewhere, if it was to recalculate it it would be disastrous for performance.
135+
constexpr SWAR
136+
shiftIntraLaneLeft(int bitCount, SWAR protectiveMask) const noexcept {
137+
return SWAR{(*this & protectiveMask).value() << bitCount};
120138
}
121139

122-
constexpr SWAR blitElement(int index, SWAR other) const noexcept {
123-
constexpr auto OneElementMask = SWAR(~(~T(0) << NBits));
124-
auto IsolationMask = OneElementMask.shiftLanesLeft(index);
125-
return (*this & ~IsolationMask) | (other & IsolationMask);
140+
/// \param protectiveMask should clear the bits that would cross the lane
141+
/// \sa shiftIntraLaneLeft
142+
constexpr SWAR
143+
shiftIntraLaneRight(int bitCount, SWAR protectiveMask) const noexcept {
144+
return SWAR{(*this & protectiveMask).value() >> bitCount};
126145
}
127146

128147
T m_v;
@@ -299,7 +318,7 @@ constexpr auto broadcast(SWAR<NBits, T> v) {
299318
/// BooleanSWAR treats the MSB of each SWAR lane as the boolean associated with that lane.
300319
template<int NBits, typename T>
301320
struct BooleanSWAR: SWAR<NBits, T> {
302-
// Booleanness is stored in MSB of a given swar.
321+
// Booleanness is stored in the MSBs
303322
static constexpr auto MaskLaneMSB =
304323
broadcast<NBits, T>(SWAR<NBits, T>(T(1) << (NBits -1)));
305324
constexpr explicit BooleanSWAR(T v): SWAR<NBits, T>(v) {}

0 commit comments

Comments
 (0)