Skip to content

Commit 87ddc04

Browse files
author
Eddie
committed
Compress/expand
1 parent 1fe091b commit 87ddc04

File tree

4 files changed

+484
-267
lines changed

4 files changed

+484
-267
lines changed

.gitignore

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# Vscode does not like to build outside of the source tree
2-
# (multiple glitches)
3-
4-
.vscode
5-
test/.vscode
1+
# Vscode does not like to build outside of the source tree
2+
# (multiple glitches)
3+
4+
.vscode
5+
test/.vscode
6+
build

inc/zoo/swar/SWAR.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace zoo { namespace swar {
1515
using u64 = uint64_t;
1616
using u32 = uint32_t;
1717
using u16 = uint16_t;
18-
using u8 = uint8_t;
18+
using u8 = std::uint8_t;
1919

2020
template<int LogNBits>
2121
constexpr uint64_t popcount(uint64_t a) noexcept {
@@ -58,7 +58,10 @@ struct SWAR {
5858
SignificantBitsCount = BitWidth - PaddingBitsCount,
5959
AllOnes = ~std::make_unsigned_t<T>{0} >> PaddingBitsCount,
6060
LeastSignificantBit = meta::BitmaskMaker<T, std::make_unsigned_t<T>{1}, NBits>::value,
61-
MostSignificantBit = LeastSignificantBit << (NBits - 1);
61+
MostSignificantBit = LeastSignificantBit << (NBits - 1),
62+
// Use LowerBits in favor of ~MostSignificantBit to not pollute
63+
// "don't care" bits when non-power-of-two bit lane sizes are supported
64+
LowerBits = MostSignificantBit - LeastSignificantBit;
6265

6366
SWAR() = default;
6467
constexpr explicit SWAR(T v): m_v(v) {}
@@ -134,14 +137,16 @@ struct SWAR {
134137
/// We are not sure the optimizer would maintain this mask somewhere, if it was to recalculate it it would be disastrous for performance.
135138
constexpr SWAR
136139
shiftIntraLaneLeft(int bitCount, SWAR protectiveMask) const noexcept {
137-
return SWAR{(*this & protectiveMask).value() << bitCount};
140+
T shiftC = static_cast<T>(bitCount); // could be a narrowing conversion
141+
auto V = (*this & protectiveMask).value();
142+
return SWAR{static_cast<T>(V << shiftC)};
138143
}
139144

140145
/// \param protectiveMask should clear the bits that would cross the lane
141146
/// \sa shiftIntraLaneLeft
142147
constexpr SWAR
143148
shiftIntraLaneRight(int bitCount, SWAR protectiveMask) const noexcept {
144-
return SWAR{(*this & protectiveMask).value() >> bitCount};
149+
return SWAR{(*this & protectiveMask).value() >> T{bitCount}};
145150
}
146151

147152
T m_v;

0 commit comments

Comments
 (0)