diff --git a/.gitignore b/.gitignore index e9113fc9..ef06d38b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ -# Vscode does not like to build outside of the source tree -# (multiple glitches) - -.vscode -test/.vscode -build -.cache + +.vscode +test/.vscode +build +.cache +.idea +**cmake-build** + +# Vscode does not like to build outside of the source tree +# (multiple glitches) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..e69de29b diff --git a/inc/zoo/meta/BitmaskMaker.h b/inc/zoo/meta/BitmaskMaker.h index 4c8008ac..0c3f26a8 100644 --- a/inc/zoo/meta/BitmaskMaker.h +++ b/inc/zoo/meta/BitmaskMaker.h @@ -42,6 +42,7 @@ struct BitmaskMaker { static_assert(0xF0F0 == BitmaskMaker::value); static_assert(0xEDFEDFED == BitmaskMaker::value); +static_assert(0b0001'0001 == BitmaskMaker::value); }} // zoo::meta diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 508ba0ac..e3409098 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -5,6 +5,7 @@ #include "zoo/meta/log.h" #include +#include #ifdef _MSC_VER #include @@ -90,6 +91,21 @@ struct SWAR { constexpr T value() const noexcept { return m_v; } + template + constexpr static T baseFromLaneLiterals(const T(&args)[N]) { + static_assert(N == Lanes, "Wrong number of lanes"); + T result = 0; + for (auto arg: args) { + result = (result << NBits) | arg; + } + return result; + } + + template + constexpr static SWAR fromLaneLiterals(const T(&args)[N]) { + return SWAR{baseFromLaneLiterals(args)}; + } + #define SWAR_UNARY_OPERATORS_X_LIST \ X(SWAR, ~) //constexpr SWAR operator~() const noexcept { return SWAR{~m_v}; } diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index ec5eb6ac..5028a121 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -260,7 +260,7 @@ template constexpr auto makeLaneMaskFromMSB(SWAR input) { using S = SWAR; auto msb = input & S{S::MostSignificantBit}; - auto msbCopiedToLSB = S{msb.value() >> (NB - 1)}; + auto msbCopiedToLSB = S{static_cast(msb.value() >> (NB - 1))}; return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB); } @@ -392,8 +392,13 @@ template< typename CountHalver > constexpr auto associativeOperatorIterated_regressive( - Base base, Base neutral, IterationCount count, IterationCount forSquaring, - Operator op, unsigned log2Count, CountHalver ch + const Base base, + const Base neutral, + IterationCount count, + const IterationCount forSquaring, + const Operator op, + unsigned log2Count, + const CountHalver ch ) { auto result = neutral; if(!log2Count) { return result; } @@ -419,17 +424,54 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( auto halver = [](auto counts) { auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{msbCleared.value() << 1}; + return S{static_cast(msbCleared.value() << 1)}; }; - multiplier = S{multiplier.value() << (NB - ActualBits)}; + multiplier = S{static_cast(multiplier.value() << (NB - ActualBits))}; return associativeOperatorIterated_regressive( - multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation, - ActualBits, halver + multiplicand, + S{0}, + multiplier, + S{S::MostSignificantBit}, + operation, + ActualBits, + halver ); } -/// \note Not removed yet because it is an example of "progressive" associative exponentiation +template +constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( + SWAR x, + SWAR exponent +) { + using S = SWAR; + + auto operation = [](auto left, auto right, auto counts) { + const auto mask = makeLaneMaskFromMSB(counts); + const auto product = + multiplication_OverflowUnsafe_SpecificBitCount(left, right); + return (product & mask) | (left & ~mask); + }; + + // halver should work same as multiplication... i think... + auto halver = [](auto counts) { + auto msbCleared = counts & ~S{S::MostSignificantBit}; + return S{static_cast(msbCleared.value() << 1)}; + }; + + exponent = S{static_cast(exponent.value() << (NB - ActualBits))}; + return associativeOperatorIterated_regressive( + x, + S{meta::BitmaskMaker().value}, // neutral is lane wise.. + exponent, + S{S::MostSignificantBit}, + operation, + ActualBits, + halver + ); +} + +// \note Not removed yet because it is an example of "progressive" associative exponentiation template constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated( SWAR multiplicand, @@ -462,6 +504,17 @@ constexpr auto multiplication_OverflowUnsafe( ); } +template +constexpr auto exponentiation_OverflowUnsafe( + SWAR base, + SWAR exponent +) { + return + exponentiation_OverflowUnsafe_SpecificBitCount( + base, exponent + ); +} + template struct SWAR_Pair{ SWAR even, odd; diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1628e222..09a129d4 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -7,7 +7,6 @@ #include #include - using namespace zoo; using namespace zoo::swar; @@ -64,8 +63,31 @@ static_assert( multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value() ); +static_assert(0b00000010000000110000010100000110 == 0x02'03'05'06); + +TEST_CASE("Expontiation with 8-bit lane width (overflow unsafe)") { + using S = SWAR<8, u32>; + constexpr auto base = S::fromLaneLiterals({2, 3, 5, 6}); + constexpr auto exponent = S::fromLaneLiterals({7, 4, 2, 3}); + constexpr auto expected = S::fromLaneLiterals({128, 81, 25, 216}); + constexpr auto actual = exponentiation_OverflowUnsafe(base, exponent); + static_assert(expected.value() == actual.value()); + CHECK(expected.value() == actual.value()); +} + +TEST_CASE("Expontiation with 16-bit lane width (overflow unsafe)") { + using S = SWAR<16, u64>; // Change to 16-bit lane width + constexpr auto base = S::fromLaneLiterals({10, 2, 7, 3}); + constexpr auto exponent = S::fromLaneLiterals({3, 5, 1, 4}); + constexpr auto expected = S::fromLaneLiterals({1000, 32, 7, 81}); + constexpr auto actual = exponentiation_OverflowUnsafe(base, exponent); + static_assert(expected.value() == actual.value()); + CHECK(expected.value() == actual.value()); } +}; + + #define HE(nbits, t, v0, v1) \ static_assert(horizontalEquality(\ SWAR(v0),\ @@ -425,7 +447,7 @@ TEST_CASE( "BooleanSWAR MSBtoLaneMask", "[swar]" ) { - // BooleanSWAR as a mask: + // BooleanSWAR as a mask: auto bswar =BooleanSWAR<4, u32>(0x0808'0000); auto mask = S4_32(0x0F0F'0000); CHECK(bswar.MSBtoLaneMask().value() == mask.value()); @@ -452,6 +474,6 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); }