@@ -508,13 +508,14 @@ wideningMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) {
508508 constexpr auto
509509 HalfLane = S::NBits,
510510 UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask ().value ();
511- auto [lower, upper] = doublePrecisionMultiplication (multiplicand, multiplier);
512- auto result = halvePrecision (lower, upper);
511+ auto [even, odd] = doublePrecisionMultiplication (multiplicand, multiplier);
513512 auto
514- over_even = D{(lower.value () & UpperHalfOfLanes) >> HalfLane},
515- over_odd = D{(upper.value () & UpperHalfOfLanes) >> HalfLane};
516- auto upper_lanes_overflow = halvePrecision (over_even, over_odd);
517- return {result, upper_lanes_overflow};
513+ upper_even = even.shiftIntraLaneRight (HalfLane, D{UpperHalfOfLanes}),
514+ upper_odd = odd.shiftIntraLaneRight (HalfLane, D{UpperHalfOfLanes});
515+ auto
516+ lower = halvePrecision (even, odd), // throws away the upper bits
517+ upper = halvePrecision (upper_even, upper_odd); // preserve the upper bits
518+ return {lower, upper};
518519}
519520
520521template <int NB, typename T>
@@ -525,8 +526,7 @@ auto saturatingMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier)
525526 auto [result, overflow] = wideningMultiplication (multiplicand, multiplier);
526527 auto did_overflow = zoo::swar::greaterEqual (overflow, One);
527528 auto lane_mask = did_overflow.MSBtoLaneMask ();
528- auto saturated = result | lane_mask;
529- return S{saturated};
529+ return S{result | lane_mask};
530530}
531531
532532template <int NB, typename T, typename MultiplicationFn>
0 commit comments