@@ -192,7 +192,8 @@ macro_rules! impl_Debug {
192192}
193193
194194// 2 digit decimal look up table
195- static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"0001020304050607080910111213141516171819\
195+ static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"\
196+ 0001020304050607080910111213141516171819\
196197 2021222324252627282930313233343536373839\
197198 4041424344454647484950515253545556575859\
198199 6061626364656667686970717273747576777879\
@@ -232,83 +233,89 @@ macro_rules! impl_Display {
232233
233234 #[ cfg( not( feature = "optimize_for_size" ) ) ]
234235 impl $unsigned {
235- fn _fmt( mut self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
236- const SIZE : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
237- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
238- let mut curr = SIZE ;
239- let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
240- let lut_ptr = DEC_DIGITS_LUT . as_ptr( ) ;
241-
242- // SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we
243- // can copy from `lut_ptr[d1..d1 + 1]` and `lut_ptr[d2..d2 + 1]`. To show
244- // that it's OK to copy into `buf_ptr`, notice that at the beginning
245- // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
246- // each step this is kept the same as `n` is divided. Since `n` is always
247- // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
248- // is safe to access.
249- unsafe {
250- // need at least 16 bits for the 4-characters-at-a-time to work.
251- #[ allow( overflowing_literals) ]
252- #[ allow( unused_comparisons) ]
253- // This block will be removed for smaller types at compile time and in the worst
254- // case, it will prevent to have the `10000` literal to overflow for `i8` and `u8`.
255- if core:: mem:: size_of:: <$unsigned>( ) >= 2 {
256- // eagerly decode 4 characters at a time
257- while self >= 10000 {
258- let rem = ( self % 10000 ) as usize ;
259- self /= 10000 ;
260-
261- let d1 = ( rem / 100 ) << 1 ;
262- let d2 = ( rem % 100 ) << 1 ;
263- curr -= 4 ;
264-
265- // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
266- // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
267- // which is `10^40 > 2^128 > n`.
268- ptr:: copy_nonoverlapping( lut_ptr. add( d1 as usize ) , buf_ptr. add( curr) , 2 ) ;
269- ptr:: copy_nonoverlapping( lut_ptr. add( d2 as usize ) , buf_ptr. add( curr + 2 ) , 2 ) ;
270- }
271- }
272-
273- // if we reach here numbers are <= 9999, so at most 4 chars long
274- let mut n = self as usize ; // possibly reduce 64bit math
236+ fn _fmt( self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
237+ const MAX_DEC_N : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
238+ // Buffer decimals for $unsigned with right alignment.
239+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
240+ // Count the number of bytes in buf that are not initialized.
241+ let mut offset = buf. len( ) ;
242+ // Consume the least-significant decimals from a working copy.
243+ let mut remain = self ;
244+
245+ // Format per four digits from the lookup table.
246+ // Four digits need a 16-bit $unsigned or wider.
247+ while size_of:: <Self >( ) > 1 && remain > 999 . try_into( ) . expect( "branch is not hit for types that cannot fit 999 (u8)" ) {
248+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
249+ // and the while condition ensures at least 4 more decimals.
250+ unsafe { core:: hint:: assert_unchecked( offset >= 4 ) }
251+ // SAFETY: The offset counts down from its initial buf.len()
252+ // without underflow due to the previous precondition.
253+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
254+ offset -= 4 ;
255+
256+ // pull two pairs
257+ let scale: Self = 1_00_00 . try_into( ) . expect( "branch is not hit for types that cannot fit 1E4 (u8)" ) ;
258+ let quad = remain % scale;
259+ remain /= scale;
260+ let pair1 = ( quad / 100 ) as usize ;
261+ let pair2 = ( quad % 100 ) as usize ;
262+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
263+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
264+ buf[ offset + 2 ] . write( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
265+ buf[ offset + 3 ] . write( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
266+ }
275267
276- // decode 2 more chars, if > 2 chars
277- if n >= 100 {
278- let d1 = ( n % 100 ) << 1 ;
279- n /= 100 ;
280- curr -= 2 ;
281- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
282- }
268+ // Format per two digits from the lookup table.
269+ if remain > 9 {
270+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
271+ // and the while condition ensures at least 2 more decimals.
272+ unsafe { core:: hint:: assert_unchecked( offset >= 2 ) }
273+ // SAFETY: The offset counts down from its initial buf.len()
274+ // without underflow due to the previous precondition.
275+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
276+ offset -= 2 ;
277+
278+ let pair = ( remain % 100 ) as usize ;
279+ remain /= 100 ;
280+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ pair * 2 + 0 ] ) ;
281+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ pair * 2 + 1 ] ) ;
282+ }
283283
284- // if we reach here numbers are <= 100, so at most 2 chars long
285- // The biggest it can be is 99, and 99 << 1 == 198, so a `u8` is enough.
286- // decode last 1 or 2 chars
287- if n < 10 {
288- curr -= 1 ;
289- * buf_ptr. add( curr) = ( n as u8 ) + b'0' ;
290- } else {
291- let d1 = n << 1 ;
292- curr -= 2 ;
293- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
294- }
284+ // Format the last remaining digit, if any.
285+ if remain != 0 || self == 0 {
286+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
287+ // and the if condition ensures (at least) 1 more decimals.
288+ unsafe { core:: hint:: assert_unchecked( offset >= 1 ) }
289+ // SAFETY: The offset counts down from its initial buf.len()
290+ // without underflow due to the previous precondition.
291+ unsafe { core:: hint:: assert_unchecked( offset <= buf. len( ) ) }
292+ offset -= 1 ;
293+
294+ // Either the compiler sees that remain < 10, or it prevents
295+ // a boundary check up next.
296+ let last = ( remain & 15 ) as usize ;
297+ buf[ offset] . write( DEC_DIGITS_LUT [ last * 2 + 1 ] ) ;
298+ // not used: remain = 0;
295299 }
296300
297- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
298- // UTF-8 since `DEC_DIGITS_LUT` is
299- let buf_slice = unsafe {
300- str :: from_utf8_unchecked(
301- slice:: from_raw_parts( buf_ptr. add( curr) , buf. len( ) - curr) )
301+ // SAFETY: All buf content since offset is set.
302+ let written = unsafe { buf. get_unchecked( offset..) } ;
303+ // SAFETY: Writes use ASCII from the lookup table exclusively.
304+ let as_str = unsafe {
305+ str :: from_utf8_unchecked( slice:: from_raw_parts(
306+ MaybeUninit :: slice_as_ptr( written) ,
307+ written. len( ) ,
308+ ) )
302309 } ;
303- f. pad_integral( is_nonnegative, "" , buf_slice )
310+ f. pad_integral( is_nonnegative, "" , as_str )
304311 }
305312 } ) *
306313
307314 #[ cfg( feature = "optimize_for_size" ) ]
308315 fn $gen_name( mut n: $u, is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
309- const SIZE : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
310- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
311- let mut curr = buf . len ( ) ;
316+ const MAX_DEC_N : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
317+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
318+ let mut curr = MAX_DEC_N ;
312319 let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
313320
314321 // SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
0 commit comments