Add f128 division

tgross35 · tgross35 · commit a3c1c5744a1a · 2024-08-19T16:06:45.000-05:00
Use the new generic division algorithm to expose `__divtf3` and
`__divkf3`.
diff --git a/README.md b/README.md
@@ -256,7 +256,7 @@ of being added to Rust.
 
 - [x] addtf3.c
 - [x] comparetf2.c
-- [ ] divtf3.c
+- [x] divtf3.c
 - [x] extenddftf2.c
 - [x] extendhfsf2.c
 - [x] extendhftf2.c
diff --git a/build.rs b/build.rs
@@ -526,7 +526,6 @@ mod c {
                 ("__floatsitf", "floatsitf.c"),
                 ("__floatunditf", "floatunditf.c"),
                 ("__floatunsitf", "floatunsitf.c"),
-                ("__divtf3", "divtf3.c"),
                 ("__powitf2", "powitf2.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
diff --git a/examples/intrinsics.rs b/examples/intrinsics.rs
@@ -256,6 +256,10 @@ mod intrinsics {
         a * b
     }
 
+    pub fn divtf(a: f128, b: f128) -> f128 {
+        a / b
+    }
+
     pub fn subtf(a: f128, b: f128) -> f128 {
         a - b
     }
@@ -440,6 +444,7 @@ fn run() {
     bb(aeabi_uldivmod(bb(2), bb(3)));
     bb(ashlti3(bb(2), bb(2)));
     bb(ashrti3(bb(2), bb(2)));
+    bb(divtf(bb(2.), bb(2.)));
     bb(divti3(bb(2), bb(2)));
     bb(eqtf(bb(2.), bb(2.)));
     bb(extendhfdf(bb(2.)));
diff --git a/src/float/div.rs b/src/float/div.rs
@@ -2,15 +2,15 @@
 //!
 //! This module documentation gives an overview of the method used. More documentation is inline.
 //!
-//! Relevant notation:
+//! # Relevant notation
 //!
 //! - `m_a`: the mantissa of `a`, in base 2
 //! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a`
 //! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned
 //!   fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM`
 //!   will have N bits of integer and M-N bits of fraction.
 //! - `hw`: half width, i.e. for `f64` this will be a `u32`.
-//! - `x` is the best estimate of `1/b`
+//! - `x` is the best estimate of `1/m_b`
 //!
 //! # Method Overview
 //!
@@ -22,10 +22,10 @@
 //! - Check for early exits (infinity, zero, etc).
 //! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
 //! - Set the implicit bit so math is correct.
-//! - Shift the significand (with implicit bit) fully left such that fixed point UQ1 or UQ0
-//!   numbers can be used for mantissa math. These will have greater precision than the actual
-//!   mantissa, which is important for correct rounding.
-//! - Calculate the reciprocal of `b`, `x`.
+//! - Shift mantissa significant digits (with implicit bit) fully left such that fixed-point UQ1
+//!   or UQ0 numbers can be used for mantissa math. These will have greater precision than the
+//!   actual mantissa, which is important for correct rounding.
+//! - Calculate the reciprocal of `m_b`, `x`.
 //! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`.
 //! - Reapply rounding.
 //!
@@ -37,7 +37,7 @@
 //!
 //! In general, Newton's method takes the following form:
 //!
-//! ```
+//! ```text
 //! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the
 //! desired result.
 //!
@@ -46,7 +46,7 @@
 //! x_{n+1} = x_n - f(x_n) / f'(x_n)
 //! ```
 //!
-//! Applying this to finding the reciprocal:
+//! Applying this to find the reciprocal:
 //!
 //! ```text
 //! 1 / x = b
@@ -59,23 +59,23 @@
 //! x_{n+1} = 2*x_n - b*x_n^2
 //! ```
 //!
-//! This is a process that can be repeated a known number of times to calculate the reciprocal with
-//! enough precision to achieve a correctly rounded result for the overall division operation.
+//! This is a process that can be repeated to calculate the reciprocal with enough precision to
+//! achieve a correctly rounded result for the overall division operation. The maximum required
+//! number of iterations is known since precision doubles with each iteration.
 //!
 //! # Half-width operations
 //!
 //! Calculating the reciprocal requires widening multiplication and performing arithmetic on the
 //! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`)
 //! gets used instead of native math.
 //!
-//! To make this more efficient, all but the final operation can be computed with half-width
+//! To make this more efficient, all but the final operation can be computed using half-width
 //! integers. For example, rather than computing four iterations using 128-bit integers for `f64`,
 //! we can instead perform three iterations using native 64-bit integers and only one final
 //! iteration using the full 128 bits.
 //!
-//! This works because precision doubles with each round, so only one round is needed to extend
-//! precision from half bits to near the full bumber of bits (some leeway is allowed here because
-//! our fixed point number has more bits than the final mantissa will).
+//! This works because of precision doubling. Some leeway is allowed here because the fixed-point
+//! number has more bits than the final mantissa will.
 //!
 //! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method
 
@@ -504,33 +504,38 @@ where
     F::from_repr(abs_result | quotient_sign)
 }
 
-/// Calculate the number of iterations required to get needed precision of a float type.
+/// Calculate the number of iterations required for a float type's precision.
+///
+/// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half
+/// the float's bit width, and `f` is the number of iterations done using integers of the float's
+/// full width. This is further explained in the module documentation.
 ///
-/// This returns `(h, f)` where `h` is the number of iterations to be donei using integers
-/// at half the float's width, and `f` is the number of iterations done using integers of the
-/// float's full width. Doing some iterations at half width is an optimization when the float
-/// is larger than a word.
+/// # Requirements
 ///
-/// ASSUMPTION: the initial estimate should have at least 8 bits of precision. If this is not
-/// true, results will be inaccurate.
+/// The initial estimate should have at least 8 bits of precision. If this is not true, results
+/// will be inaccurate.
 const fn get_iterations<F: Float>() -> (usize, usize) {
-    // Precision doubles with each iteration
+    // Precision doubles with each iteration. Assume we start with 8 bits of precision.
     let total_iterations = F::BITS.ilog2() as usize - 2;
 
     if 2 * size_of::<F>() <= size_of::<*const ()>() {
         // If widening multiplication will be efficient (uses word-sized integers), there is no
         // reason to use half-sized iterations.
         (0, total_iterations)
     } else {
+        // Otherwise, do as many iterations as possible at half width.
         (total_iterations - 1, 1)
     }
 }
 
-/// u_n for different precisions (with N-1 half-width iterations):
+/// `u_n` for different precisions (with N-1 half-width iterations).
+///
 /// W0 is the precision of C
 ///   u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
 ///
 /// Estimated with bc:
+///
+/// ```text
 ///   define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
 ///   define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
 ///   define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
@@ -543,8 +548,9 @@ const fn get_iterations<F: Float>() -> (usize, usize) {
 /// u_3         | < 7.31      |              | < 7.31       | < 27054456580
 /// u_4         |             |              |              | < 80.4
 /// Final (U_N) | same as u_3 | < 72         | < 218        | < 13920
+/// ````
 ///
-/// Add 2 to U_N due to final decrement.
+/// Add 2 to `U_N` due to final decrement.
 const fn reciprocal_precision<F: Float>() -> u16 {
     let (half_iterations, full_iterations) = get_iterations::<F>();
 
@@ -612,6 +618,13 @@ intrinsics! {
         div(a, b)
     }
 
+    #[avr_skip]
+    #[ppc_alias = __divkf3]
+    #[cfg(not(feature = "no-f16-f128"))]
+    pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
+        div(a, b)
+    }
+
     #[cfg(target_arch = "arm")]
     pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
         a / b
diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs
@@ -1,3 +1,4 @@
+#![feature(f128)]
 #![allow(unused_macros)]
 
 use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@@ -152,4 +153,19 @@ mod float_div {
         f32, __divsf3vfp, Single, all();
         f64, __divdf3vfp, Double, all();
     }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+    float! {
+        f128, __divtf3, Quad,
+        // FIXME(llvm): there is a bug in LLVM rt.
+        // See <https://github.com/llvm/llvm-project/issues/91840>.
+        not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
+    }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+    float! {
+        f128, __divkf3, Quad, not(feature = "no-sys-f128");
+    }
 }