From 4419ff2158456c8d8c482c9337d7650ab6c0b767 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Thu, 31 Jul 2025 19:54:16 +0300
Subject: [PATCH 1/7] define and implement trait NarrowingDiv for unsigned
 integer division

---
 libm/src/math/support/big/tests.rs            |  26 +++
 libm/src/math/support/int_traits.rs           |   3 +
 .../math/support/int_traits/narrowing_div.rs  | 162 ++++++++++++++++++
 libm/src/math/support/mod.rs                  |   2 +-
 4 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 libm/src/math/support/int_traits/narrowing_div.rs
diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs
index d54706c72..0c32f445c 100644
--- a/libm/src/math/support/big/tests.rs
+++ b/libm/src/math/support/big/tests.rs
@@ -3,6 +3,7 @@ use std::string::String;
 use std::{eprintln, format};
 
 use super::{HInt, MinInt, i256, u256};
+use crate::support::{Int as _, NarrowingDiv};
 
 const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
 
@@ -336,3 +337,28 @@ fn i256_shifts() {
         x = y;
     }
 }
+#[test]
+fn div_u256_by_u128() {
+    for j in i8::MIN..=i8::MAX {
+        let y: u128 = (j as i128).rotate_right(4).unsigned();
+        if y == 0 {
+            continue;
+        }
+        for i in i8::MIN..=i8::MAX {
+            let x: u128 = (i as i128).rotate_right(4).unsigned();
+            let xy = x.widen_mul(y);
+            assert_eq!(xy.checked_narrowing_div_rem(y), Some((x, 0)));
+            if y != 1 {
+                assert_eq!((xy + u256::ONE).checked_narrowing_div_rem(y), Some((x, 1)));
+            }
+            if x != 0 {
+                assert_eq!(
+                    (xy - u256::ONE).checked_narrowing_div_rem(y),
+                    Some((x - 1, y - 1))
+                );
+            }
+            let r = ((y as f64) * 0.12345) as u128;
+            assert_eq!((xy + r.widen()).checked_narrowing_div_rem(y), Some((x, r)));
+        }
+    }
+}
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index 9d8826dfe..f1aa1e5b9 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -1,5 +1,8 @@
 use core::{cmp, fmt, ops};
 
+mod narrowing_div;
+pub use narrowing_div::NarrowingDiv;
+
 /// Minimal integer implementations needed on all integer types, including wide integers.
 #[allow(dead_code)] // Some constants are only used with tests
 pub trait MinInt:
diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
new file mode 100644
index 000000000..8d0158426
--- /dev/null
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -0,0 +1,162 @@
+use crate::support::{DInt, HInt, Int, MinInt, u256};
+
+/// Trait for unsigned division of a double-wide integer
+/// when the quotient doesn't overflow.
+///
+/// This is the inverse of widening multiplication:
+///  - for any `x` and nonzero `y`: `x.widen_mul(y).checked_narrowing_div_rem(y) == Some((x, 0))`,
+///  - and for any `r in 0..y`: `x.carrying_mul(y, r).checked_narrowing_div_rem(y) == Some((x, r))`,
+pub trait NarrowingDiv: DInt + MinInt<Unsigned = Self> {
+    /// Computes `(self / n, self % n))`
+    ///
+    /// # Safety
+    /// The caller must ensure that `self.hi() < n`, or equivalently,
+    /// that the quotient does not overflow.
+    unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H);
+
+    /// Returns `Some((self / n, self % n))` when `self.hi() < n`.
+    fn checked_narrowing_div_rem(self, n: Self::H) -> Option<(Self::H, Self::H)> {
+        if self.hi() < n {
+            Some(unsafe { self.unchecked_narrowing_div_rem(n) })
+        } else {
+            None
+        }
+    }
+}
+
+macro_rules! impl_narrowing_div_primitive {
+    ($D:ident) => {
+        impl NarrowingDiv for $D {
+            unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H) {
+                if self.hi() >= n {
+                    unsafe { core::hint::unreachable_unchecked() }
+                }
+                ((self / n as $D) as Self::H, (self % n as $D) as Self::H)
+            }
+        }
+    };
+}
+
+// Extend division from `u2N / uN` to `u4N / u2N`
+// This is not the most efficient algorithm, but it is
+// relatively simple.
+macro_rules! impl_narrowing_div_recurse {
+    ($D:ident) => {
+        impl NarrowingDiv for $D {
+            unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H) {
+                if self.hi() >= n {
+                    unsafe { core::hint::unreachable_unchecked() }
+                }
+
+                // Normalize the divisor by shifting the most significant one
+                // to the leading position. `n != 0` is implied by `self.hi() < n`
+                let lz = n.leading_zeros();
+                let a = self << lz;
+                let b = n << lz;
+
+                let ah = a.hi();
+                let (a0, a1) = a.lo().lo_hi();
+                // SAFETY: For both calls, `b.leading_zeros() == 0` by the above shift.
+                // SAFETY: `ah < b` follows from `self.hi() < n`
+                let (q1, r) = unsafe { div_three_digits_by_two(a1, ah, b) };
+                // SAFETY: `r < b` is given as the postcondition of the previous call
+                let (q0, r) = unsafe { div_three_digits_by_two(a0, r, b) };
+
+                // Undo the earlier normalization for the remainder
+                (Self::H::from_lo_hi(q0, q1), r >> lz)
+            }
+        }
+    };
+}
+
+impl_narrowing_div_primitive!(u16);
+impl_narrowing_div_primitive!(u32);
+impl_narrowing_div_primitive!(u64);
+impl_narrowing_div_primitive!(u128);
+impl_narrowing_div_recurse!(u256);
+
+/// Implement `u3N / u2N`-division on top of `u2N / uN`-division.
+///
+/// Returns the quotient and remainder of `(a * R + a0) / n`,
+/// where `R = (1 << U::BITS)` is the digit size.
+///
+/// # Safety
+/// Requires that `n.leading_zeros() == 0` and `a < n`.
+unsafe fn div_three_digits_by_two<U>(a0: U, a: U::D, n: U::D) -> (U, U::D)
+where
+    U: HInt,
+    U::D: Int + NarrowingDiv,
+{
+    if n.leading_zeros() > 0 || a >= n {
+        debug_assert!(false, "unsafe preconditions not met");
+        unsafe { core::hint::unreachable_unchecked() }
+    }
+
+    // n = n1R + n0
+    let (n0, n1) = n.lo_hi();
+    // a = a2R + a1
+    let (a1, a2) = a.lo_hi();
+
+    let mut q;
+    let mut r;
+    let mut wrap;
+    // `a < n` is guaranteed by the caller, but `a2 == n1 && a1 < n0` is possible
+    if let Some((q0, r1)) = a.checked_narrowing_div_rem(n1) {
+        q = q0;
+        // a = qn1 + r1, where 0 <= r1 < n1
+
+        // Include the remainder with the low bits:
+        // r = a0 + r1R
+        r = U::D::from_lo_hi(a0, r1);
+
+        // Subtract the contribution of the divisor low bits with the estimated quotient
+        let d = q.widen_mul(n0);
+        (r, wrap) = r.overflowing_sub(d);
+
+        // Since `q` is the quotient of dividing with a slightly smaller divisor,
+        // it may be an overapproximation, but is never too small, and similarly,
+        // `r` is now either the correct remainder ...
+        if !wrap {
+            return (q, r);
+        }
+        // ... or the remainder went "negative" (by as much as `d = qn0 < RR`)
+        // and we have to adjust.
+        q -= U::ONE;
+    } else {
+        debug_assert!(a2 == n1 && a1 < n0);
+        // Otherwise, `a2 == n1`, and the estimated quotient would be
+        // `R + (a1 % n1)`, but the correct quotient can't overflow.
+        // We'll start from `q = R = (1 << U::BITS)`,
+        // so `r = aR + a0 - qn = (a - n)R + a0`
+        r = U::D::from_lo_hi(a0, a1.wrapping_sub(n0));
+        // Since `a < n`, the first decrement is always needed:
+        q = U::MAX; /* R - 1 */
+    }
+
+    (r, wrap) = r.overflowing_add(n);
+    if wrap {
+        return (q, r);
+    }
+
+    // If the remainder still didn't wrap, we need another step.
+    q -= U::ONE;
+    (r, wrap) = r.overflowing_add(n);
+    // Since `n >= RR/2`, at least one of the two `r += n` must have wrapped.
+    debug_assert!(wrap, "estimated quotient should be off by at most two");
+    (q, r)
+}
+
+#[cfg(test)]
+mod test {
+    use super::{HInt, NarrowingDiv};
+
+    #[test]
+    fn inverse_mul() {
+        for x in 0..=u8::MAX {
+            for y in 1..=u8::MAX {
+                let xy = x.widen_mul(y);
+                assert_eq!(xy.checked_narrowing_div_rem(y), Some((x, 0)));
+            }
+        }
+    }
+}
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index b2d7bd8d5..98e3950c8 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -28,7 +28,7 @@ pub use hex_float::hf16;
 pub use hex_float::hf128;
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
-pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt, NarrowingDiv};
 
 /// Hint to the compiler that the current path is cold.
 pub fn cold_path() {

From 08716e30a564ea2493d6a9063d455436ef6dabc2 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 12 Aug 2025 17:49:36 +0300
Subject: [PATCH 2/7] allow dead code

---
 libm/src/math/support/int_traits/narrowing_div.rs | 1 +
 libm/src/math/support/mod.rs                      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
index 8d0158426..bcc258ef4 100644
--- a/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -6,6 +6,7 @@ use crate::support::{DInt, HInt, Int, MinInt, u256};
 /// This is the inverse of widening multiplication:
 ///  - for any `x` and nonzero `y`: `x.widen_mul(y).checked_narrowing_div_rem(y) == Some((x, 0))`,
 ///  - and for any `r in 0..y`: `x.carrying_mul(y, r).checked_narrowing_div_rem(y) == Some((x, r))`,
+#[allow(dead_code)]
 pub trait NarrowingDiv: DInt + MinInt<Unsigned = Self> {
     /// Computes `(self / n, self % n))`
     ///
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index 98e3950c8..7b529eb76 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -28,6 +28,7 @@ pub use hex_float::hf16;
 pub use hex_float::hf128;
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
+#[allow(unused_imports)]
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt, NarrowingDiv};
 
 /// Hint to the compiler that the current path is cold.

From 12b4e650d4b7b28f1237749e469d6fa9ffa2836c Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Sun, 24 Aug 2025 13:53:38 +0300
Subject: [PATCH 3/7] add cases to NarrowingDiv unit test

---
 libm/src/math/support/int_traits/narrowing_div.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
index bcc258ef4..e87041e3e 100644
--- a/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -157,6 +157,17 @@ mod test {
             for y in 1..=u8::MAX {
                 let xy = x.widen_mul(y);
                 assert_eq!(xy.checked_narrowing_div_rem(y), Some((x, 0)));
+                assert_eq!(
+                    (xy + (y - 1) as u16).checked_narrowing_div_rem(y),
+                    Some((x, y - 1))
+                );
+                if y > 1 {
+                    assert_eq!((xy + 1).checked_narrowing_div_rem(y), Some((x, 1)));
+                    assert_eq!(
+                        (xy + (y - 2) as u16).checked_narrowing_div_rem(y),
+                        Some((x, y - 2))
+                    );
+                }
             }
         }
     }

From 901d6603d8d455015c13dfba1fd1da42b1df06f6 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Sun, 24 Aug 2025 13:58:24 +0300
Subject: [PATCH 4/7] remove a redundant debug_assert

---
 libm/src/math/support/int_traits/narrowing_div.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
index e87041e3e..ab9a3ef78 100644
--- a/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -89,7 +89,6 @@ where
     U::D: Int + NarrowingDiv,
 {
     if n.leading_zeros() > 0 || a >= n {
-        debug_assert!(false, "unsafe preconditions not met");
         unsafe { core::hint::unreachable_unchecked() }
     }
 

From 1e5e6697f6b6c21e782029ed4af075aa1c6e2ed2 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 26 Aug 2025 12:12:12 +0300
Subject: [PATCH 5/7] clarify impl_narrowing_div_primitive

---
 libm/src/math/support/int_traits/narrowing_div.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
index ab9a3ef78..29dc889f3 100644
--- a/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -1,4 +1,4 @@
-use crate::support::{DInt, HInt, Int, MinInt, u256};
+use crate::support::{CastInto, DInt, HInt, Int, MinInt, u256};
 
 /// Trait for unsigned division of a double-wide integer
 /// when the quotient doesn't overflow.
@@ -25,6 +25,8 @@ pub trait NarrowingDiv: DInt + MinInt<Unsigned = Self> {
     }
 }
 
+// For primitive types we can just use the standard
+// division operators in the double-wide type.
 macro_rules! impl_narrowing_div_primitive {
     ($D:ident) => {
         impl NarrowingDiv for $D {
@@ -32,7 +34,7 @@ macro_rules! impl_narrowing_div_primitive {
                 if self.hi() >= n {
                     unsafe { core::hint::unreachable_unchecked() }
                 }
-                ((self / n as $D) as Self::H, (self % n as $D) as Self::H)
+                ((self / n.widen()).cast(), (self % n.widen()).cast())
             }
         }
     };

From 7f3801e88b1661621fb85ef559f80b2c11f87c00 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 26 Aug 2025 12:14:04 +0300
Subject: [PATCH 6/7] add license information

---
 libm/src/math/support/int_traits/narrowing_div.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libm/src/math/support/int_traits/narrowing_div.rs b/libm/src/math/support/int_traits/narrowing_div.rs
index 29dc889f3..3da0843cc 100644
--- a/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/libm/src/math/support/int_traits/narrowing_div.rs
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
 use crate::support::{CastInto, DInt, HInt, Int, MinInt, u256};
 
 /// Trait for unsigned division of a double-wide integer

From d71f7632b8c77bb3d30822cf0aa9f391bf6b4386 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 26 Aug 2025 12:29:33 +0300
Subject: [PATCH 7/7] add icount benchmark for u256 NarrowingDiv

---
 libm-test/benches/icount.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index 02ee13f80..0b8577122 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -111,6 +111,17 @@ fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
     }
 }
 
+#[library_benchmark]
+#[bench::linspace(setup_u128_mul())]
+fn icount_bench_u256_narrowing_div(cases: Vec<(u128, u128)>) {
+    use libm::support::NarrowingDiv;
+    for (x, y) in cases.iter().copied() {
+        let x = black_box(x.widen_hi());
+        let y = black_box(y);
+        black_box(x.checked_narrowing_div_rem(y));
+    }
+}
+
 #[library_benchmark]
 #[bench::linspace(setup_u256_add())]
 fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
@@ -145,7 +156,7 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 
 library_benchmark_group!(
     name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
+    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_narrowing_div, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
 );
 
 #[library_benchmark]