Factors SWARWithSublanes to its header (#77)

thecppzoo · web-flow · commit 169f4f364317 · 2024-03-06T18:11:53.000-04:00
* Factors SWARWithSublanes to its header * Change not intended for Windows * Omission * Omission #2 * Misspelling * Another misspelling --------- Co-authored-by: Eddie <eddie see email elsewhere>
diff --git a/inc/zoo/map/RobinHoodUtil.h b/inc/zoo/map/RobinHoodUtil.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "zoo/swar/SWAR.h"
+#include "zoo/swar/SWARWithSubLanes.h"
 
 #include <array>
 #include <cstddef>
diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h
@@ -160,109 +160,6 @@ struct SWAR {
     T m_v;
 };
 
-// SWAR is a useful abstraction for performing computations in lanes overlaid
-// over any given integral type.
-// Doing additions, subtractions, and compares via SWAR techniques requires an
-// extra bit per lane be available past the lane size, _or_ knowledge that both
-// of your MSBs are set 0 (leaving space for the operation).  Similarly, doing
-// multiplications via SWAR techniques require double bits per lane (unless you
-// can bind your inputs at half lane size).
-// This leads to a useful technique (which we use in the robin hood table)
-// where we interleave two related small bit count integers inside of a lane of
-// swar.  More generally, this is useful because it sometimes allows fast
-// operations on side "a" of some lane if side "b" is blitted out, and vice
-// versa.  In the spirit of separation of concerns, we provide a cut-lane-SWAR
-// abstraction here.
-
-template<int NBitsLeast_, int NBitsMost_, typename T = uint64_t>
-struct SWARWithSubLanes: SWAR<NBitsLeast_ + NBitsMost_ , T> {
-    static constexpr inline auto NBitsLeast = NBitsLeast_;
-    static constexpr inline auto NBitsMost = NBitsMost_;
-
-    using Base = SWAR<NBitsMost + NBitsLeast, T>;
-    static constexpr inline auto Available = sizeof(T);
-    static constexpr inline auto LaneBits = NBitsLeast + NBitsMost;
-
-    using Base::Base;
-    constexpr SWARWithSubLanes(Base b) noexcept: Base(b) {}
-    constexpr SWARWithSubLanes(T most, T least) noexcept:
-        Base((most << NBitsLeast) | least)
-    {}
-
-    // M is most significant bits slice, L is least significant bits slice.
-    // 0x....M2L2M1L1 or MN|LN||...||M2|L2||M1|L1
-    using SL = SWARWithSubLanes<NBitsLeast, NBitsMost, T>;
-
-    static constexpr inline auto LeastOnes =
-        Base(meta::BitmaskMaker<T, Base{1}.value(), LaneBits>::value);
-    static constexpr inline auto MostOnes =
-        Base(LeastOnes.value() << NBitsLeast);
-    static constexpr inline auto LeastMask = MostOnes - LeastOnes;
-    static constexpr inline auto MostMask = ~LeastMask;
-
-    constexpr auto least() const noexcept {
-        return SL{LeastMask & *this};
-    }
-
-    // Isolate the least significant bits of the lane at the specified position.
-    constexpr auto least(int pos) const noexcept {
-        constexpr auto Filter = SL((T(1) << NBitsLeast) - 1);
-        return Filter.shiftLanesLeft(pos) & *this;
-    }
-
-    // Returns only the least significant bits at specified position, 'decoded' to their integer value.
-    constexpr auto leastFlat(int pos) const noexcept {
-        return least().at(pos);
-    }
-
-    constexpr auto most() const noexcept {
-        return SL{MostMask & *this};
-    }
-
-    // The most significant bits of the lane at the specified position.
-    constexpr auto most(int pos) const noexcept {
-        constexpr auto Filter =
-            SL(((T(1) << SL::NBitsMost) - 1) << SL::NBitsLeast);
-        return Filter.shiftLanesLeft(pos) & *this;
-    }
-
-    // The most significant bits of the lane at the specified position,
-    // 'decoded' to their integer value.
-    constexpr auto mostFlat(int pos) const noexcept {
-        return most().at(pos) >> SL::NBitsLeast;
-    }
-
-    // Blits most sig bits into least significant bits. Experimental.
-    constexpr auto flattenMostToLeast(int pos) const noexcept {
-        return SL(this->m_v >> NBitsLeast) & LeastMask;
-    }
-
-    // Blits least sig bits into most significant bits. Experimental.
-    constexpr auto promoteLeastToMost(int pos) const noexcept {
-        return SL(this->m_v << NBitsMost) & MostMask;
-    }
-
-    // Sets the lsb sublane at |pos| with least significant NBitsLeast of |in|
-    constexpr auto least(T in, int pos) const noexcept {
-        constexpr auto filter = (T(1) << LaneBits) - 1;
-        const auto keep = ~(filter << (LaneBits * pos)) | MostMask.value();
-        const auto rdyToInsert = this->m_v & keep;
-        const auto rval = rdyToInsert | ((in & LeastMask.value()) << (LaneBits * pos));
-        return SL(rval);
-    }
-
-    // Sets the msb sublane at |pos| with least significant NBitsMost of |in|
-    constexpr auto most(T in, int pos) const noexcept {
-        constexpr auto filter = (T(1) << LaneBits) - 1;
-        const auto keep = ~(filter << (LaneBits * pos)) | LeastMask.value();
-        const auto rdyToInsert = this->m_v & keep;
-        const auto insVal = (((in<<NBitsLeast) & MostMask.value()) << (LaneBits * pos));
-        const auto rval = rdyToInsert | insVal;
-        return SL(rval);
-    }
-};
-
-
 /// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
 template<int NBits, typename T = uint64_t>
 constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {
diff --git a/inc/zoo/swar/SWARWithSubLanes.h b/inc/zoo/swar/SWARWithSubLanes.h
@@ -0,0 +1,121 @@
+#ifndef ZOO_SWAR_SWARWITHSUBLANES_H
+#define ZOO_SWAR_SWARWITHSUBLANES_H
+
+#include "zoo/swar/SWAR.h"
+
+namespace zoo { namespace swar {
+
+/// \brief Allows SWAR Lanes to be treated both as a whole or something with
+/// internal structure.
+
+/// Example: Robin Hood "Haystack" metadata composed of hoisted hash bits and
+/// PSL (probe sequence lengths), that are used together or separately.
+/// SWAR is a useful abstraction for performing computations in lanes overlaid
+/// over any given integral type.
+/// To prevent the normal integer operations in a lane to disrrupt the operation
+/// in the adjoining lanes, some precautions must be maintained.  For example
+/// upon an addition of lanes, we either need that the domain of our values
+/// does not use the most significant bit (guaranteeing normal addition of
+/// lanes won't cross to the upper lane) or that this possibility is explicitly
+/// taken into account (see "full addition").  This applies to all operations,
+/// including comparisons.
+/// Similarly, doing multiplications via SWAR techniques require double bits per
+/// lane (unless you can guarantee the values of the input lanes are half lane
+/// size).
+/// This leads to a useful technique (which we use in the Robin Hood table)
+/// where we interleave two related small bit count integers inside of a lane of
+/// swar.  More generally, this is useful because it sometimes allows fast
+/// operations on side "a" of some lane if side "b" is blitted out, and vice
+/// versa.  In the spirit of separation of concerns, we provide a cut-lane-SWAR
+/// abstraction here.
+template<int NBitsLeast_, int NBitsMost_, typename T = uint64_t>
+struct SWARWithSubLanes: SWAR<NBitsLeast_ + NBitsMost_ , T> {
+    static constexpr inline auto NBitsLeast = NBitsLeast_;
+    static constexpr inline auto NBitsMost = NBitsMost_;
+
+    using Base = SWAR<NBitsMost + NBitsLeast, T>;
+    static constexpr inline auto Available = sizeof(T);
+    static constexpr inline auto LaneBits = NBitsLeast + NBitsMost;
+
+    using Base::Base;
+    constexpr SWARWithSubLanes(Base b) noexcept: Base(b) {}
+    constexpr SWARWithSubLanes(T most, T least) noexcept:
+        Base((most << NBitsLeast) | least)
+    {}
+
+    // M is most significant bits slice, L is least significant bits slice.
+    // 0x....M2L2M1L1 or MN|LN||...||M2|L2||M1|L1
+    using SL = SWARWithSubLanes<NBitsLeast, NBitsMost, T>;
+
+    static constexpr inline auto LeastOnes =
+        Base(meta::BitmaskMaker<T, Base{1}.value(), LaneBits>::value);
+    static constexpr inline auto MostOnes =
+        Base(LeastOnes.value() << NBitsLeast);
+    static constexpr inline auto LeastMask = MostOnes - LeastOnes;
+    static constexpr inline auto MostMask = ~LeastMask;
+
+    constexpr auto least() const noexcept {
+        return SL{LeastMask & *this};
+    }
+
+    // Isolate the least significant bits of the lane at the specified position.
+    constexpr auto least(int pos) const noexcept {
+        constexpr auto Filter = SL((T(1) << NBitsLeast) - 1);
+        return Filter.shiftLanesLeft(pos) & *this;
+    }
+
+    // Returns only the least significant bits at specified position, 'decoded' to their integer value.
+    constexpr auto leastFlat(int pos) const noexcept {
+        return least().at(pos);
+    }
+
+    constexpr auto most() const noexcept {
+        return SL{MostMask & *this};
+    }
+
+    // The most significant bits of the lane at the specified position.
+    constexpr auto most(int pos) const noexcept {
+        constexpr auto Filter =
+            SL(((T(1) << SL::NBitsMost) - 1) << SL::NBitsLeast);
+        return Filter.shiftLanesLeft(pos) & *this;
+    }
+
+    // The most significant bits of the lane at the specified position,
+    // 'decoded' to their integer value.
+    constexpr auto mostFlat(int pos) const noexcept {
+        return most().at(pos) >> SL::NBitsLeast;
+    }
+
+    // Blits most sig bits into least significant bits. Experimental.
+    constexpr auto flattenMostToLeast(int pos) const noexcept {
+        return SL(this->m_v >> NBitsLeast) & LeastMask;
+    }
+
+    // Blits least sig bits into most significant bits. Experimental.
+    constexpr auto promoteLeastToMost(int pos) const noexcept {
+        return SL(this->m_v << NBitsMost) & MostMask;
+    }
+
+    // Sets the lsb sublane at |pos| with least significant NBitsLeast of |in|
+    constexpr auto least(T in, int pos) const noexcept {
+        constexpr auto filter = (T(1) << LaneBits) - 1;
+        const auto keep = ~(filter << (LaneBits * pos)) | MostMask.value();
+        const auto rdyToInsert = this->m_v & keep;
+        const auto rval = rdyToInsert | ((in & LeastMask.value()) << (LaneBits * pos));
+        return SL(rval);
+    }
+
+    // Sets the msb sublane at |pos| with least significant NBitsMost of |in|
+    constexpr auto most(T in, int pos) const noexcept {
+        constexpr auto filter = (T(1) << LaneBits) - 1;
+        const auto keep = ~(filter << (LaneBits * pos)) | LeastMask.value();
+        const auto rdyToInsert = this->m_v & keep;
+        const auto insVal = (((in<<NBitsLeast) & MostMask.value()) << (LaneBits * pos));
+        const auto rval = rdyToInsert | insVal;
+        return SL(rval);
+    }
+};
+
+}}
+
+#endif
diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h
@@ -35,7 +35,7 @@ std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR<NB, B> s) {
 
 #else
 
-#define ZOO_TRACEABLE_EXPRESSION(...) __VA_ARGS__
+#define ZOO_TRACEABLE_EXPRESSION(...) (void)(__VA_ARGS__)
 
 #endif
 
@@ -130,7 +130,7 @@ Desired result:
 1111 0110 0100 0100 0100 1000 1001 1010 forParallelSuffix
 
                            10 1101 1101
-/*
+
 Complete example (32 bits)
 Selection mask:
 0001 0011 0111 0111 0110 1110 1100 1010
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -111,7 +111,7 @@ else()
     )
     set(
         SWAR_SOURCES
-        swar/BasicOperations.cpp
+        swar/BasicOperations.cpp swar/sublanes.cpp
     )
     set(
         MAP_SOURCES
diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp
@@ -275,58 +275,6 @@ GE_MSB_TEST(0x7777'7777,
             0x0123'4567,
             0x8888'8888)
 
-// 3 bits on msb side, 5 bits on lsb side.
-using Lanes = SWARWithSubLanes<5, 3, u32>;
-using S8u32 = SWAR<8, u32>;
-static constexpr inline u32 all0 = 0;
-static constexpr inline u32 allF = broadcast<8>(S8u32(0x0000'00FFul)).value();
-
-static_assert(allF == Lanes(allF).value());
-static_assert(0xFFFF'FFFF == Lanes(allF).value());
-
-static_assert(0xFFFF'FFE0 == Lanes(allF).least(0,0).value());
-static_assert(0xFFFF'FFE1 == Lanes(allF).least(1,0).value());
-static_assert(0xFFFF'E0FF == Lanes(allF).least(0,1).value());
-static_assert(0xFFFF'E1FF == Lanes(allF).least(1,1).value());
-
-static_assert(0xFFE0'FFFF == Lanes(allF).least(0,2).value());
-static_assert(0xFFE1'FFFF == Lanes(allF).least(1,2).value());
-static_assert(0xE0FF'FFFF == Lanes(allF).least(0,3).value());
-static_assert(0xE1FF'FFFF == Lanes(allF).least(1,3).value());
-
-static_assert(0xFFFF'FF1F == Lanes(allF).most(0,0).value());
-static_assert(0xFFFF'FF3F == Lanes(allF).most(1,0).value());
-static_assert(0xFFFF'1FFF == Lanes(allF).most(0,1).value());
-static_assert(0xFFFF'3FFF == Lanes(allF).most(1,1).value());
-
-static_assert(0xFF1F'FFFF == Lanes(allF).most(0,2).value());
-static_assert(0xFF3F'FFFF == Lanes(allF).most(1,2).value());
-static_assert(0x1FFF'FFFF == Lanes(allF).most(0,3).value());
-static_assert(0x3FFF'FFFF == Lanes(allF).most(1,3).value());
-
-static_assert(0x0000'001f == Lanes(all0).least(31, 0).most(0, 0).value());
-static_assert(0x0000'1f00 == Lanes(all0).least(31, 1).most(0, 1).value());
-static_assert(0x001f'0000 == Lanes(all0).least(31, 2).most(0, 2).value());
-static_assert(0x1f00'0000 == Lanes(all0).least(31, 3).most(0, 3).value());
-
-static_assert(0x0000'00e0 == Lanes(all0).least(0, 0).most(31, 0).value());
-static_assert(0x0000'e000 == Lanes(all0).least(0, 1).most(31, 1).value());
-static_assert(0x00e0'0000 == Lanes(all0).least(0, 2).most(31, 2).value());
-static_assert(0xe000'0000 == Lanes(all0).least(0, 3).most(31, 3).value());
-
-static_assert(0x1F1F'1F1F == Lanes(allF).least().value());
-static_assert(0xE0E0'E0E0 == Lanes(allF).most().value());
-
-static_assert(0x0000'001F == Lanes(allF).least(0).value());
-static_assert(0x0000'1F00 == Lanes(allF).least(1).value());
-static_assert(0x001F'0000 == Lanes(allF).least(2).value());
-static_assert(0x1F00'0000 == Lanes(allF).least(3).value());
-
-static_assert(0x0000'00E0 == Lanes(allF).most(0).value());
-static_assert(0x0000'E000 == Lanes(allF).most(1).value());
-static_assert(0x00E0'0000 == Lanes(allF).most(2).value());
-static_assert(0xE000'0000 == Lanes(allF).most(3).value());
-
 static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value());
 static_assert(0 == isolateLSB(u32(0)));
 
diff --git a/test/swar/sublanes.cpp b/test/swar/sublanes.cpp
@@ -0,0 +1,56 @@
+#include "zoo/swar/SWARWithSubLanes.h"
+
+using namespace zoo;
+using namespace zoo::swar;
+
+// 3 bits on msb side, 5 bits on lsb side.
+using Lanes = SWARWithSubLanes<5, 3, u32>;
+using S8u32 = SWAR<8, u32>;
+static constexpr inline u32 all0 = 0;
+static constexpr inline u32 allF = broadcast<8>(S8u32(0x0000'00FFul)).value();
+
+static_assert(allF == Lanes(allF).value());
+static_assert(0xFFFF'FFFF == Lanes(allF).value());
+
+static_assert(0xFFFF'FFE0 == Lanes(allF).least(0,0).value());
+static_assert(0xFFFF'FFE1 == Lanes(allF).least(1,0).value());
+static_assert(0xFFFF'E0FF == Lanes(allF).least(0,1).value());
+static_assert(0xFFFF'E1FF == Lanes(allF).least(1,1).value());
+
+static_assert(0xFFE0'FFFF == Lanes(allF).least(0,2).value());
+static_assert(0xFFE1'FFFF == Lanes(allF).least(1,2).value());
+static_assert(0xE0FF'FFFF == Lanes(allF).least(0,3).value());
+static_assert(0xE1FF'FFFF == Lanes(allF).least(1,3).value());
+
+static_assert(0xFFFF'FF1F == Lanes(allF).most(0,0).value());
+static_assert(0xFFFF'FF3F == Lanes(allF).most(1,0).value());
+static_assert(0xFFFF'1FFF == Lanes(allF).most(0,1).value());
+static_assert(0xFFFF'3FFF == Lanes(allF).most(1,1).value());
+
+static_assert(0xFF1F'FFFF == Lanes(allF).most(0,2).value());
+static_assert(0xFF3F'FFFF == Lanes(allF).most(1,2).value());
+static_assert(0x1FFF'FFFF == Lanes(allF).most(0,3).value());
+static_assert(0x3FFF'FFFF == Lanes(allF).most(1,3).value());
+
+static_assert(0x0000'001f == Lanes(all0).least(31, 0).most(0, 0).value());
+static_assert(0x0000'1f00 == Lanes(all0).least(31, 1).most(0, 1).value());
+static_assert(0x001f'0000 == Lanes(all0).least(31, 2).most(0, 2).value());
+static_assert(0x1f00'0000 == Lanes(all0).least(31, 3).most(0, 3).value());
+
+static_assert(0x0000'00e0 == Lanes(all0).least(0, 0).most(31, 0).value());
+static_assert(0x0000'e000 == Lanes(all0).least(0, 1).most(31, 1).value());
+static_assert(0x00e0'0000 == Lanes(all0).least(0, 2).most(31, 2).value());
+static_assert(0xe000'0000 == Lanes(all0).least(0, 3).most(31, 3).value());
+
+static_assert(0x1F1F'1F1F == Lanes(allF).least().value());
+static_assert(0xE0E0'E0E0 == Lanes(allF).most().value());
+
+static_assert(0x0000'001F == Lanes(allF).least(0).value());
+static_assert(0x0000'1F00 == Lanes(allF).least(1).value());
+static_assert(0x001F'0000 == Lanes(allF).least(2).value());
+static_assert(0x1F00'0000 == Lanes(allF).least(3).value());
+
+static_assert(0x0000'00E0 == Lanes(allF).most(0).value());
+static_assert(0x0000'E000 == Lanes(allF).most(1).value());
+static_assert(0x00E0'0000 == Lanes(allF).most(2).value());
+static_assert(0xE000'0000 == Lanes(allF).most(3).value());

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ else()`
`111`	`111`	`)`
`112`	`112`	`set(`
`113`	`113`	`SWAR_SOURCES`
`114`		`- swar/BasicOperations.cpp`
	`114`	`+ swar/BasicOperations.cpp swar/sublanes.cpp`
`115`	`115`	`)`
`116`	`116`	`set(`
`117`	`117`	`MAP_SOURCES`