55#include " zoo/swar/SWAR.h"
66#include < assert.h>
77#include < cstdint>
8+ #include < utility>
89
910// #define ZOO_DEVELOPMENT_DEBUGGING
1011#ifdef ZOO_DEVELOPMENT_DEBUGGING
@@ -450,6 +451,7 @@ constexpr auto multiplication_OverflowUnsafe(
450451 );
451452}
452453
454+
453455template <int NB, typename T>
454456struct SWAR_Pair {
455457 SWAR<NB, T> even, odd;
@@ -479,6 +481,7 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) {
479481 auto
480482 evenHalf = RV{even.value ()} & HalvingMask,
481483 oddHalf = RV{(RV{odd.value ()} & HalvingMask).value () << NB/2 };
484+
482485 return evenHalf | oddHalf;
483486}
484487
@@ -502,19 +505,46 @@ doublePrecisionMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier)
502505}
503506
504507template <int NB, typename T>
505- constexpr MultiplicationResult<NB, T>
506- wideningMultiplication (SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) {
507- using S = SWAR<NB, T>; using D = SWAR<NB * 2 , T>;
508+ constexpr auto deinterleaveLanesOfPair = []( auto even, auto odd) {
509+ using S = SWAR<NB, T>;
510+ using H = SWAR<NB / 2 , T>;
508511 constexpr auto
509- HalfLane = S::NBits,
510- UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask ().value ();
511- auto [even, odd] = doublePrecisionMultiplication (multiplicand, multiplier);
512- auto
513- upper_even = even.shiftIntraLaneRight (HalfLane, D{UpperHalfOfLanes}),
514- upper_odd = odd.shiftIntraLaneRight (HalfLane, D{UpperHalfOfLanes});
515- auto
512+ HalfLane = H::NBits,
513+ UpperHalfOfLanes = H::oddLaneMask ().value ();
514+ auto
515+ upper_even = even.shiftIntraLaneRight (HalfLane, S{UpperHalfOfLanes}),
516+ upper_odd = odd.shiftIntraLaneRight (HalfLane, S{UpperHalfOfLanes});
517+ auto
516518 lower = halvePrecision (even, odd), // throws away the upper bits
517519 upper = halvePrecision (upper_even, upper_odd); // preserve the upper bits
520+ return std::make_pair (lower, upper);
521+ };
522+
523+ namespace test_deinterleaving {
524+
525+ template <int NB, typename T>
526+ auto test = [](auto a, auto b, auto expected_lower, auto expected_upper) {
527+ auto [lower, upper] = deinterleaveLanesOfPair<NB, T>(a, b);
528+ auto lower_ok = lower.value () == expected_lower.value ();
529+ auto upper_ok = upper.value () == expected_upper.value ();
530+ return lower_ok && upper_ok;
531+ };
532+
533+ using S = SWAR<8 , uint32_t >;
534+ static_assert (test<8 , uint32_t >(
535+ S{0xFDFCFBFA }, // input a
536+ S{0xF4F3F2F1 }, // input b
537+ S{0x4D3C2B1A }, // expected lower
538+ S{0xFFFFFFFF } // expected upper
539+ ));
540+
541+ } // namespace test_deinterleaving
542+
543+ template <int NB, typename T>
544+ constexpr MultiplicationResult<NB, T>
545+ wideningMultiplication (SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) {
546+ auto [even, odd] = doublePrecisionMultiplication (multiplicand, multiplier);
547+ auto [lower, upper] = deinterleaveLanesOfPair<NB * 2 , T>(even, odd);
518548 return {lower, upper};
519549}
520550
0 commit comments