22//!
33//! This module documentation gives an overview of the method used. More documentation is inline.
44//!
5- //! Relevant notation:
5+ //! # Relevant notation
66//!
77//! - `m_a`: the mantissa of `a`, in base 2
88//! - `p_a`: the exponent of `a`, in base 2. I.e. `a = m_a * 2^p_a`
99//! - `uqN` (e.g. `uq1`): this refers to Q notation for fixed-point numbers. UQ1.31 is an unsigned
1010//! fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM`
1111//! will have N bits of integer and M-N bits of fraction.
1212//! - `hw`: half width, i.e. for `f64` this will be a `u32`.
13- //! - `x` is the best estimate of `1/b `
13+ //! - `x` is the best estimate of `1/m_b `
1414//!
1515//! # Method Overview
1616//!
2222//! - Check for early exits (infinity, zero, etc).
2323//! - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
2424//! - Set the implicit bit so math is correct.
25- //! - Shift the significand (with implicit bit) fully left such that fixed point UQ1 or UQ0
26- //! numbers can be used for mantissa math. These will have greater precision than the actual
27- //! mantissa, which is important for correct rounding.
28- //! - Calculate the reciprocal of `b `, `x`.
25+ //! - Shift mantissa significant digits (with implicit bit) fully left such that fixed- point UQ1
26+ //! or UQ0 numbers can be used for mantissa math. These will have greater precision than the
27+ //! actual mantissa, which is important for correct rounding.
28+ //! - Calculate the reciprocal of `m_b `, `x`.
2929//! - Use the reciprocal to multiply rather than divide: `res = m_a * x_b * 2^{p_a - p_b}`.
3030//! - Reapply rounding.
3131//!
3737//!
3838//! In general, Newton's method takes the following form:
3939//!
40- //! ```
40+ //! ```text
4141//! `x_n` is a guess or the result of a previous iteration. Increasing `n` converges to the
4242//! desired result.
4343//!
4646//! x_{n+1} = x_n - f(x_n) / f'(x_n)
4747//! ```
4848//!
49- //! Applying this to finding the reciprocal:
49+ //! Applying this to find the reciprocal:
5050//!
5151//! ```text
5252//! 1 / x = b
5959//! x_{n+1} = 2*x_n - b*x_n^2
6060//! ```
6161//!
62- //! This is a process that can be repeated a known number of times to calculate the reciprocal with
63- //! enough precision to achieve a correctly rounded result for the overall division operation.
62+ //! This is a process that can be repeated to calculate the reciprocal with enough precision to
63+ //! achieve a correctly rounded result for the overall division operation. The maximum required
64+ //! number of iterations is known since precision doubles with each iteration.
6465//!
6566//! # Half-width operations
6667//!
6768//! Calculating the reciprocal requires widening multiplication and performing arithmetic on the
6869//! results, meaning that emulated integer arithmetic on `u128` (for `f64`) and `u256` (for `f128`)
6970//! gets used instead of native math.
7071//!
71- //! To make this more efficient, all but the final operation can be computed with half-width
72+ //! To make this more efficient, all but the final operation can be computed using half-width
7273//! integers. For example, rather than computing four iterations using 128-bit integers for `f64`,
7374//! we can instead perform three iterations using native 64-bit integers and only one final
7475//! iteration using the full 128 bits.
7576//!
76- //! This works because precision doubles with each round, so only one round is needed to extend
77- //! precision from half bits to near the full bumber of bits (some leeway is allowed here because
78- //! our fixed point number has more bits than the final mantissa will).
77+ //! This works because of precision doubling. Some leeway is allowed here because the fixed-point
78+ //! number has more bits than the final mantissa will.
7979//!
8080//! [Newton-Raphson method]: https://en.wikipedia.org/wiki/Newton%27s_method
8181
@@ -504,33 +504,38 @@ where
504504 F :: from_repr ( abs_result | quotient_sign)
505505}
506506
507- /// Calculate the number of iterations required to get needed precision of a float type.
507+ /// Calculate the number of iterations required for a float type's precision.
508+ ///
509+ /// This returns `(h, f)` where `h` is the number of iterations to be done using integers at half
510+ /// the float's bit width, and `f` is the number of iterations done using integers of the float's
511+ /// full width. This is further explained in the module documentation.
508512///
509- /// This returns `(h, f)` where `h` is the number of iterations to be donei using integers
510- /// at half the float's width, and `f` is the number of iterations done using integers of the
511- /// float's full width. Doing some iterations at half width is an optimization when the float
512- /// is larger than a word.
513+ /// # Requirements
513514///
514- /// ASSUMPTION: the initial estimate should have at least 8 bits of precision. If this is not
515- /// true, results will be inaccurate.
515+ /// The initial estimate should have at least 8 bits of precision. If this is not true, results
516+ /// will be inaccurate.
516517const fn get_iterations < F : Float > ( ) -> ( usize , usize ) {
517- // Precision doubles with each iteration
518+ // Precision doubles with each iteration. Assume we start with 8 bits of precision.
518519 let total_iterations = F :: BITS . ilog2 ( ) as usize - 2 ;
519520
520521 if 2 * size_of :: < F > ( ) <= size_of :: < * const ( ) > ( ) {
521522 // If widening multiplication will be efficient (uses word-sized integers), there is no
522523 // reason to use half-sized iterations.
523524 ( 0 , total_iterations)
524525 } else {
526+ // Otherwise, do as many iterations as possible at half width.
525527 ( total_iterations - 1 , 1 )
526528 }
527529}
528530
529- /// u_n for different precisions (with N-1 half-width iterations):
531+ /// `u_n` for different precisions (with N-1 half-width iterations).
532+ ///
530533/// W0 is the precision of C
531534/// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
532535///
533536/// Estimated with bc:
537+ ///
538+ /// ```text
534539/// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
535540/// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
536541/// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
@@ -543,8 +548,9 @@ const fn get_iterations<F: Float>() -> (usize, usize) {
543548/// u_3 | < 7.31 | | < 7.31 | < 27054456580
544549/// u_4 | | | | < 80.4
545550/// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
551+ /// ````
546552///
547- /// Add 2 to U_N due to final decrement.
553+ /// Add 2 to ` U_N` due to final decrement.
548554const fn reciprocal_precision < F : Float > ( ) -> u16 {
549555 let ( half_iterations, full_iterations) = get_iterations :: < F > ( ) ;
550556
@@ -612,6 +618,13 @@ intrinsics! {
612618 div( a, b)
613619 }
614620
621+ #[ avr_skip]
622+ #[ ppc_alias = __divkf3]
623+ #[ cfg( not( feature = "no-f16-f128" ) ) ]
624+ pub extern "C" fn __divtf3( a: f128, b: f128) -> f128 {
625+ div( a, b)
626+ }
627+
615628 #[ cfg( target_arch = "arm" ) ]
616629 pub extern "C" fn __divsf3vfp( a: f32 , b: f32 ) -> f32 {
617630 a / b
0 commit comments