Skip to content

Commit 0fca8d3

Browse files
Implement GreaterLess with MSB on for SWAR. (#78)
* first pass of the art of computer programming msb on less than * fix some indents, fix some T{constant} initializers, rename a bad name function name * explicit types are almost never what we want in this code.
1 parent 1ddb71b commit 0fca8d3

File tree

2 files changed

+149
-34
lines changed

2 files changed

+149
-34
lines changed

inc/zoo/swar/SWAR.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ constexpr uint64_t popcount(uint64_t a) noexcept {
2727
>::execute(a);
2828
}
2929

30-
3130
/// Index into the bits of the type T that contains the MSB.
3231
template<typename T>
3332
constexpr std::make_unsigned_t<T> msbIndex(T v) noexcept {
@@ -189,7 +188,7 @@ constexpr auto isolateLSB(T v) {
189188

190189
template<int NBits, typename T>
191190
constexpr auto leastNBitsMask() {
192-
return (T(1ull)<<NBits)-1;
191+
return (T{1}<<NBits)-1;
193192
}
194193

195194
template<int NBits, uint64_t T>
@@ -199,7 +198,7 @@ constexpr auto leastNBitsMask() {
199198

200199
template<int NBits, typename T = uint64_t>
201200
constexpr T mostNBitsMask() {
202-
return ~leastNBitsMask<sizeof(T)*8-NBits, T>();
201+
return ~leastNBitsMask<sizeof(T)*8-NBits, T>();
203202
}
204203

205204

@@ -297,6 +296,10 @@ struct BooleanSWAR: SWAR<NBits, T> {
297296
friend constexpr BooleanSWAR<NB, TT>
298297
greaterEqual_MSB_off(SWAR<NB, TT>, SWAR<NB, TT>) noexcept;
299298

299+
template<int NB, typename TT>
300+
constexpr T
301+
indexOfMostSignficantLaneSet(SWAR<NBits, T> test) noexcept;
302+
300303
template<int NB, typename TT>
301304
friend constexpr BooleanSWAR<NB, TT>
302305
convertToBooleanSWAR(SWAR<NB, TT> arg) noexcept;
@@ -362,6 +365,39 @@ constantIsGreaterEqual_MSB_off(SWAR<NBits, T> subtrahend) noexcept {
362365
}
363366
}
364367

368+
template<typename T, typename U, typename V>
369+
constexpr T median(T x, U y, V z) {
370+
return (x | y) & (y | z) & (x | z);
371+
}
372+
373+
template<int NBits, typename T>
374+
constexpr BooleanSWAR<NBits, T>
375+
greaterEqual(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {
376+
// Adapted from TAOCP V4 P152
377+
// h is msbselector, x is right, l is lower/left. Sets MSB to 1 in lanes
378+
// in test variable t for when xi < yi for lane i . Invert for greaterEqual.
379+
// t = h & ~<x~yz>
380+
// z = (x|h) - (y&~h)
381+
using S = swar::SWAR<NBits, T>;
382+
const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y
383+
const auto z = (x|h) - (y&~h);
384+
// bitwise ternary median!
385+
const auto t = h & ~median(x, ~y, z);
386+
return ~BooleanSWAR<NBits, T>{static_cast<T>(t)}; // ~(x<y) === x >= y
387+
}
388+
389+
// In the condition where only MSBs will be on, we can fast lookup with 1 multiply the index of the most signficant byte that is on.
390+
// This appears to be a mapping from the (say) 256 unique values of a 64 bit int where only MSBs of each 8 bits can be on, but I don't fully understand it.
391+
// Adapted from TAOCP Vol 4A Page 153 Eq 94.
392+
template<int NBits, typename T>
393+
constexpr T
394+
indexOfMostSignficantLaneSet(SWAR<NBits, T> test) noexcept {
395+
const auto TypeWidth = sizeof(T) * 8;
396+
const auto TopVal = (T{1}<<(TypeWidth-NBits))-1, BottomVal = (T{1}<<(NBits-1))-1;
397+
const T MappingConstant = TopVal / BottomVal;
398+
return (test.value() * MappingConstant) >> (TypeWidth - NBits);
399+
}
400+
365401
template<int NBits, typename T>
366402
constexpr BooleanSWAR<NBits, T>
367403
greaterEqual_MSB_off(SWAR<NBits, T> left, SWAR<NBits, T> right) noexcept {

test/swar/BasicOperations.cpp

Lines changed: 110 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,19 @@
22

33
#include "catch2/catch.hpp"
44

5+
#include <ios>
6+
#include <iomanip>
7+
#include <iostream>
58
#include <type_traits>
69

710

811
using namespace zoo;
912
using namespace zoo::swar;
1013

14+
using S2_64 = SWAR<2, uint64_t>;
15+
using S2_32 = SWAR<2, uint32_t>;
16+
using S2_16 = SWAR<2, uint16_t>;
17+
1118
using S4_64 = SWAR<4, uint64_t>;
1219
using S4_32 = SWAR<4, uint32_t>;
1320
using S4_16 = SWAR<4, uint16_t>;
@@ -260,44 +267,116 @@ static_assert(8 == lsbIndex(1<<8));
260267
static_assert(17 == lsbIndex(1<<17));
261268
static_assert(30 == lsbIndex(1<<30));
262269

263-
/*
264-
These tests were not catching errors known to have been present
270+
271+
/*These tests were not catching errors known to have been present
265272
static_assert(0x80880008 == greaterEqual<3>(SWAR<4, uint32_t>(0x3245'1027)).value());
266273
static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value());
267274
static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x7654'3210)).value());
268275
static_assert(0x00000008 == greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).value());
269276
static_assert(0x80000000 == greaterEqual<7>(SWAR<4, uint32_t>(0x7654'3210)).value());
270277
*/
271278

272-
// Unusual formatting for easy visual verification.
273-
#define GE_MSB_TEST(left, right, result) static_assert(result== greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value());
274-
275-
GE_MSB_TEST(0x1000'0010,
276-
0x0111'1101,
277-
0x8000'0080)
278-
GE_MSB_TEST(0x4333'3343,
279-
0x4444'4444,
280-
0x8000'0080)
281-
GE_MSB_TEST(0x0550'0110,
282-
0x0110'0550,
283-
0x8888'8008)
284-
GE_MSB_TEST(0x4771'1414,
285-
0x4641'1774,
286-
0x8888'8008)
287-
288-
GE_MSB_TEST(0x0123'4567,
289-
0x0000'0000,
290-
0x8888'8888)
291-
GE_MSB_TEST(0x0123'4567,
292-
0x7777'7777,
293-
0x0000'0008)
294-
295-
GE_MSB_TEST(0x0000'0000,
296-
0x0123'4567,
297-
0x8000'0000)
298-
GE_MSB_TEST(0x7777'7777,
299-
0x0123'4567,
300-
0x8888'8888)
279+
280+
#define GE_MSB_TEST(left, right, result) static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value());
281+
282+
GE_MSB_TEST(
283+
0x1000'0010,
284+
0x0111'1101,
285+
0x8000'0080)
286+
GE_MSB_TEST(
287+
0x4333'3343,
288+
0x4444'4444,
289+
0x8000'0080)
290+
GE_MSB_TEST(
291+
0x0550'0110,
292+
0x0110'0550,
293+
0x8888'8008)
294+
GE_MSB_TEST(
295+
0x4771'1414,
296+
0x4641'1774,
297+
0x8888'8008)
298+
GE_MSB_TEST(
299+
0x0123'4567,
300+
0x0000'0000,
301+
0x8888'8888)
302+
GE_MSB_TEST(
303+
0x0123'4567,
304+
0x7777'7777,
305+
0x0000'0008)
306+
GE_MSB_TEST(
307+
0x0000'0000,
308+
0x0123'4567,
309+
0x8000'0000)
310+
GE_MSB_TEST(
311+
0x7777'7777,
312+
0x0123'4567,
313+
0x8888'8888)
314+
315+
// Replicate the msb off tests with the greaterEqual that allows msb on
316+
#define GE_MSB_ON_TEST(left, right, result) static_assert(result == greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value());
317+
318+
GE_MSB_ON_TEST(
319+
0x1000'0010,
320+
0x0111'1101,
321+
0x8000'0080)
322+
GE_MSB_ON_TEST(
323+
0x4333'3343,
324+
0x4444'4444,
325+
0x8000'0080)
326+
GE_MSB_ON_TEST(
327+
0x0550'0110,
328+
0x0110'0550,
329+
0x8888'8008)
330+
GE_MSB_ON_TEST(
331+
0x4771'1414,
332+
0x4641'1774,
333+
0x8888'8008)
334+
GE_MSB_ON_TEST(
335+
0x0123'4567,
336+
0x0000'0000,
337+
0x8888'8888)
338+
GE_MSB_ON_TEST(
339+
0x0123'4567,
340+
0x7777'7777,
341+
0x0000'0008)
342+
GE_MSB_ON_TEST(
343+
0x0000'0000,
344+
0x0123'4567,
345+
0x8000'0000)
346+
GE_MSB_ON_TEST(
347+
0x7777'7777,
348+
0x0123'4567,
349+
0x8888'8888)
350+
351+
TEST_CASE(
352+
"greaterEqualMSBOn",
353+
"[swar][unsigned-swar]"
354+
) {
355+
SECTION("single") {
356+
for (uint32_t i = 1; i < 4; i++) {
357+
const auto left = S2_16{0}.blitElement(1, i);
358+
const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1);
359+
const auto test = S2_16{0}.blitElement(1, 2);
360+
CHECK(test.value() == greaterEqual<2, u16>(left, right).value());
361+
}
362+
}
363+
SECTION("single") {
364+
for (uint32_t i = 1; i < 15; i++) {
365+
const auto large = S4_32{0}.blitElement(1, i+1);
366+
const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1);
367+
const auto test = S4_32{0}.blitElement(1, 8);
368+
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
369+
}
370+
}
371+
SECTION("allLanes") {
372+
for (uint32_t i = 1; i < 15; i++) {
373+
const auto small = S4_32(S4_32::LeastSignificantBit * (i-1));
374+
const auto large = S4_32(S4_32::LeastSignificantBit * (i+1));
375+
const auto test = S4_32(S4_32::LeastSignificantBit * 8);
376+
CHECK(test.value() == greaterEqual<4, u32>(large, small).value());
377+
}
378+
}
379+
}
301380

302381
static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value());
303382
static_assert(0 == isolateLSB(u32(0)));

0 commit comments

Comments
 (0)