11//! Helper trait for generic float types.
22
3+ use core:: f64;
4+
35use crate :: fmt:: { Debug , LowerExp } ;
46use crate :: num:: FpCategory ;
5- use crate :: ops:: { Add , Div , Mul , Neg } ;
7+ use crate :: ops:: { self , Add , Div , Mul , Neg } ;
8+
9+ /// Lossy `as` casting between two types.
10+ pub trait CastInto < T : Copy > : Copy {
11+ fn cast ( self ) -> T ;
12+ }
13+
14+ /// Collection of traits that allow us to be generic over integer size.
15+ pub trait Integer :
16+ Sized
17+ + Clone
18+ + Copy
19+ + Debug
20+ + ops:: Shr < u32 , Output = Self >
21+ + ops:: Shl < u32 , Output = Self >
22+ + ops:: BitAnd < Output = Self >
23+ + ops:: BitOr < Output = Self >
24+ + PartialEq
25+ + CastInto < i16 >
26+ {
27+ const ZERO : Self ;
28+ const ONE : Self ;
29+ }
630
7- /// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
31+ macro_rules! int {
32+ ( $( $ty: ty) ,+) => {
33+ $(
34+ impl CastInto <i16 > for $ty {
35+ fn cast( self ) -> i16 {
36+ self as i16
37+ }
38+ }
39+
40+ impl Integer for $ty {
41+ const ZERO : Self = 0 ;
42+ const ONE : Self = 1 ;
43+ }
44+ ) +
45+ }
46+ }
47+
48+ int ! ( u32 , u64 ) ;
49+
50+ /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
851///
952/// See the parent module's doc comment for why this is necessary.
1053///
11- /// Should **never ever** be implemented for other types or be used outside the dec2flt module.
54+ /// Should **never ever** be implemented for other types or be used outside the ` dec2flt` module.
1255#[ doc( hidden) ]
1356pub trait RawFloat :
1457 Sized
@@ -24,62 +67,93 @@ pub trait RawFloat:
2467 + Copy
2568 + Debug
2669{
70+ /// The unsigned integer with the same size as the float
71+ type Int : Integer + Into < u64 > ;
72+
73+ /* general constants */
74+
2775 const INFINITY : Self ;
2876 const NEG_INFINITY : Self ;
2977 const NAN : Self ;
3078 const NEG_NAN : Self ;
3179
80+ /// Bit width of the float
81+ const BITS : u32 ;
82+
83+ /// Mantissa digits including the hidden bit (provided by core)
84+ const MANTISSA_BITS : u32 ;
85+
86+ const EXPONENT_MASK : Self :: Int ;
87+ const MANTISSA_MASK : Self :: Int ;
88+
3289 /// The number of bits in the significand, *excluding* the hidden bit.
33- const MANTISSA_EXPLICIT_BITS : usize ;
34-
35- // Round-to-even only happens for negative values of q
36- // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37- // the 32-bitcase.
38- //
39- // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40- // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41- // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42- //
43- // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44- // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45- // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46- // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47- // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48- //
49- // Thus we have that we only need to round ties to even when
50- // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51- // (in the 32-bit case). In both cases,the power of five(5^|q|)
52- // fits in a 64-bit word.
90+ const MANTISSA_EXPLICIT_BITS : u32 = Self :: MANTISSA_BITS - 1 ;
91+
92+ /// Bits for the exponent
93+ const EXPONENT_BITS : u32 = Self :: BITS - Self :: MANTISSA_EXPLICIT_BITS - 1 ;
94+
95+ /// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
96+ const MINIMUM_EXPONENT : i32 = -( 1 << ( Self :: EXPONENT_BITS - 1 ) ) + 1 ;
97+
98+ /// Maximum exponent without overflowing to infinity
99+ const MAXIMUM_EXPONENT : u32 = ( 1 << Self :: EXPONENT_BITS ) - 1 ;
100+
101+ /// The exponent bias value
102+ const EXPONENT_BIAS : u32 = Self :: MAXIMUM_EXPONENT >> 1 ;
103+
104+ /// Largest exponent value `(1 << EXP_BITS) - 1`.
105+ const INFINITE_POWER : i32 = ( 1 << Self :: EXPONENT_BITS ) - 1 ;
106+
107+ /// Round-to-even only happens for negative values of q
108+ /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
109+ /// the 32-bitcase.
110+ ///
111+ /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
112+ /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
113+ /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
114+ ///
115+ /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
116+ /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
117+ /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
118+ /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
119+ /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
120+ ///
121+ /// Thus we have that we only need to round ties to even when
122+ /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
123+ /// (in the 32-bit case). In both cases,the power of five(5^|q|)
124+ /// fits in a 64-bit word.
53125 const MIN_EXPONENT_ROUND_TO_EVEN : i32 ;
54126 const MAX_EXPONENT_ROUND_TO_EVEN : i32 ;
55127
56- // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57- const MIN_EXPONENT_FAST_PATH : i64 ;
58-
59- // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60- const MAX_EXPONENT_FAST_PATH : i64 ;
128+ /* limits related to Fast pathing */
61129
62- // Maximum exponent that can be represented for a disguised-fast path case.
63- // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 ;
130+ /// Largest decimal exponent for a non-infinite value.
131+ ///
132+ /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
133+ /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
134+ const LARGEST_POWER_OF_TEN : i32 =
135+ ( ( Self :: EXPONENT_BIAS as f64 + 1.0 ) / f64:: consts:: LOG2_10 ) as i32 ;
65136
66- // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67- const MINIMUM_EXPONENT : i32 ;
137+ /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
138+ /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
139+ const SMALLEST_POWER_OF_TEN : i32 =
140+ -( ( ( Self :: EXPONENT_BIAS + Self :: MANTISSA_BITS + 64 ) as f64 ) / f64:: consts:: LOG2_10 ) as i32 ;
68141
69- // Largest exponent value `(1 << EXP_BITS) - 1`.
70- const INFINITE_POWER : i32 ;
142+ /// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
143+ // assuming FLT_EVAL_METHOD = 0
144+ const MAX_EXPONENT_FAST_PATH : i64 =
145+ ( ( Self :: MANTISSA_BITS as f64 ) / ( f64:: consts:: LOG2_10 - 1.0 ) ) as i64 ;
71146
72- // Index (in bits) of the sign.
73- const SIGN_INDEX : usize ;
147+ /// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
148+ const MIN_EXPONENT_FAST_PATH : i64 = - Self :: MAX_EXPONENT_FAST_PATH ;
74149
75- // Smallest decimal exponent for a non-zero value.
76- const SMALLEST_POWER_OF_TEN : i32 ;
150+ /// Maximum exponent that can be represented for a disguised-fast path case.
151+ /// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
152+ const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 =
153+ Self :: MAX_EXPONENT_FAST_PATH + ( Self :: MANTISSA_BITS as f64 / f64:: consts:: LOG2_10 ) as i64 ;
77154
78- // Largest decimal exponent for a non-infinite value.
79- const LARGEST_POWER_OF_TEN : i32 ;
80-
81- // Maximum mantissa for the fast-path (`1 << 53` for f64).
82- const MAX_MANTISSA_FAST_PATH : u64 = 2_u64 << Self :: MANTISSA_EXPLICIT_BITS ;
155+ /// Maximum mantissa for the fast-path (`1 << 53` for f64).
156+ const MAX_MANTISSA_FAST_PATH : u64 = 1 << Self :: MANTISSA_BITS ;
83157
84158 /// Converts integer into float through an as cast.
85159 /// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +170,45 @@ pub trait RawFloat:
96170 /// Returns the category that this number falls into.
97171 fn classify ( self ) -> FpCategory ;
98172
173+ /// Transmute to the integer representation
174+ fn to_bits ( self ) -> Self :: Int ;
175+
99176 /// Returns the mantissa, exponent and sign as integers.
100- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) ;
177+ ///
178+ /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
179+ /// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
180+ /// minimum subnormal power.
181+ fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
182+ let bits = self . to_bits ( ) ;
183+ let sign: i8 = if bits >> ( Self :: BITS - 1 ) == Self :: Int :: ZERO { 1 } else { -1 } ;
184+ let mut exponent: i16 =
185+ ( ( bits & Self :: EXPONENT_MASK ) >> Self :: MANTISSA_EXPLICIT_BITS ) . cast ( ) ;
186+ let mantissa = if exponent == 0 {
187+ ( bits & Self :: MANTISSA_MASK ) << 1
188+ } else {
189+ ( bits & Self :: MANTISSA_MASK ) | ( Self :: Int :: ONE << Self :: MANTISSA_EXPLICIT_BITS )
190+ } ;
191+ // Exponent bias + mantissa shift
192+ exponent -= ( Self :: EXPONENT_BIAS + Self :: MANTISSA_EXPLICIT_BITS ) as i16 ;
193+ ( mantissa. into ( ) , exponent, sign)
194+ }
101195}
102196
103197impl RawFloat for f32 {
198+ type Int = u32 ;
199+
104200 const INFINITY : Self = f32:: INFINITY ;
105201 const NEG_INFINITY : Self = f32:: NEG_INFINITY ;
106202 const NAN : Self = f32:: NAN ;
107203 const NEG_NAN : Self = -f32:: NAN ;
108204
109- const MANTISSA_EXPLICIT_BITS : usize = 23 ;
205+ const BITS : u32 = 32 ;
206+ const MANTISSA_BITS : u32 = Self :: MANTISSA_DIGITS ;
207+ const EXPONENT_MASK : Self :: Int = Self :: EXP_MASK ;
208+ const MANTISSA_MASK : Self :: Int = Self :: MAN_MASK ;
209+
110210 const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -17 ;
111211 const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 10 ;
112- const MIN_EXPONENT_FAST_PATH : i64 = -10 ; // assuming FLT_EVAL_METHOD = 0
113- const MAX_EXPONENT_FAST_PATH : i64 = 10 ;
114- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 17 ;
115- const MINIMUM_EXPONENT : i32 = -127 ;
116- const INFINITE_POWER : i32 = 0xFF ;
117- const SIGN_INDEX : usize = 31 ;
118- const SMALLEST_POWER_OF_TEN : i32 = -65 ;
119- const LARGEST_POWER_OF_TEN : i32 = 38 ;
120212
121213 #[ inline]
122214 fn from_u64 ( v : u64 ) -> Self {
@@ -136,16 +228,8 @@ impl RawFloat for f32 {
136228 TABLE [ exponent & 15 ]
137229 }
138230
139- /// Returns the mantissa, exponent and sign as integers.
140- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
141- let bits = self . to_bits ( ) ;
142- let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 } ;
143- let mut exponent: i16 = ( ( bits >> 23 ) & 0xff ) as i16 ;
144- let mantissa =
145- if exponent == 0 { ( bits & 0x7fffff ) << 1 } else { ( bits & 0x7fffff ) | 0x800000 } ;
146- // Exponent bias + mantissa shift
147- exponent -= 127 + 23 ;
148- ( mantissa as u64 , exponent, sign)
231+ fn to_bits ( self ) -> Self :: Int {
232+ self . to_bits ( )
149233 }
150234
151235 fn classify ( self ) -> FpCategory {
@@ -154,22 +238,20 @@ impl RawFloat for f32 {
154238}
155239
156240impl RawFloat for f64 {
157- const INFINITY : Self = f64:: INFINITY ;
158- const NEG_INFINITY : Self = f64:: NEG_INFINITY ;
159- const NAN : Self = f64:: NAN ;
160- const NEG_NAN : Self = -f64:: NAN ;
241+ type Int = u64 ;
242+
243+ const INFINITY : Self = Self :: INFINITY ;
244+ const NEG_INFINITY : Self = Self :: NEG_INFINITY ;
245+ const NAN : Self = Self :: NAN ;
246+ const NEG_NAN : Self = -Self :: NAN ;
247+
248+ const BITS : u32 = 64 ;
249+ const MANTISSA_BITS : u32 = Self :: MANTISSA_DIGITS ;
250+ const EXPONENT_MASK : Self :: Int = Self :: EXP_MASK ;
251+ const MANTISSA_MASK : Self :: Int = Self :: MAN_MASK ;
161252
162- const MANTISSA_EXPLICIT_BITS : usize = 52 ;
163253 const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -4 ;
164254 const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 23 ;
165- const MIN_EXPONENT_FAST_PATH : i64 = -22 ; // assuming FLT_EVAL_METHOD = 0
166- const MAX_EXPONENT_FAST_PATH : i64 = 22 ;
167- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 37 ;
168- const MINIMUM_EXPONENT : i32 = -1023 ;
169- const INFINITE_POWER : i32 = 0x7FF ;
170- const SIGN_INDEX : usize = 63 ;
171- const SMALLEST_POWER_OF_TEN : i32 = -342 ;
172- const LARGEST_POWER_OF_TEN : i32 = 308 ;
173255
174256 #[ inline]
175257 fn from_u64 ( v : u64 ) -> Self {
@@ -190,19 +272,8 @@ impl RawFloat for f64 {
190272 TABLE [ exponent & 31 ]
191273 }
192274
193- /// Returns the mantissa, exponent and sign as integers.
194- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
195- let bits = self . to_bits ( ) ;
196- let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 } ;
197- let mut exponent: i16 = ( ( bits >> 52 ) & 0x7ff ) as i16 ;
198- let mantissa = if exponent == 0 {
199- ( bits & 0xfffffffffffff ) << 1
200- } else {
201- ( bits & 0xfffffffffffff ) | 0x10000000000000
202- } ;
203- // Exponent bias + mantissa shift
204- exponent -= 1023 + 52 ;
205- ( mantissa, exponent, sign)
275+ fn to_bits ( self ) -> Self :: Int {
276+ self . to_bits ( )
206277 }
207278
208279 fn classify ( self ) -> FpCategory {
0 commit comments