@@ -192,7 +192,8 @@ macro_rules! impl_Debug {
192192}
193193
194194// 2 digit decimal look up table
195- static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"0001020304050607080910111213141516171819\
195+ static DEC_DIGITS_LUT : & [ u8 ; 200 ] = b"\
196+ 0001020304050607080910111213141516171819\
196197 2021222324252627282930313233343536373839\
197198 4041424344454647484950515253545556575859\
198199 6061626364656667686970717273747576777879\
@@ -232,83 +233,68 @@ macro_rules! impl_Display {
232233
233234 #[ cfg( not( feature = "optimize_for_size" ) ) ]
234235 impl $unsigned {
235- fn _fmt( mut self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
236- const SIZE : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
237- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
238- let mut curr = SIZE ;
239- let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
240- let lut_ptr = DEC_DIGITS_LUT . as_ptr( ) ;
241-
242- // SAFETY: Since `d1` and `d2` are always less than or equal to `198`, we
243- // can copy from `lut_ptr[d1..d1 + 1]` and `lut_ptr[d2..d2 + 1]`. To show
244- // that it's OK to copy into `buf_ptr`, notice that at the beginning
245- // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
246- // each step this is kept the same as `n` is divided. Since `n` is always
247- // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
248- // is safe to access.
249- unsafe {
250- // need at least 16 bits for the 4-characters-at-a-time to work.
251- #[ allow( overflowing_literals) ]
252- #[ allow( unused_comparisons) ]
253- // This block will be removed for smaller types at compile time and in the worst
254- // case, it will prevent to have the `10000` literal to overflow for `i8` and `u8`.
255- if core:: mem:: size_of:: <$unsigned>( ) >= 2 {
256- // eagerly decode 4 characters at a time
257- while self >= 10000 {
258- let rem = ( self % 10000 ) as usize ;
259- self /= 10000 ;
260-
261- let d1 = ( rem / 100 ) << 1 ;
262- let d2 = ( rem % 100 ) << 1 ;
263- curr -= 4 ;
264-
265- // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
266- // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
267- // which is `10^40 > 2^128 > n`.
268- ptr:: copy_nonoverlapping( lut_ptr. add( d1 as usize ) , buf_ptr. add( curr) , 2 ) ;
269- ptr:: copy_nonoverlapping( lut_ptr. add( d2 as usize ) , buf_ptr. add( curr + 2 ) , 2 ) ;
270- }
271- }
236+ fn _fmt( self , is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
237+ // Buffer decimals for $unsigned type with fixed positions. Thus
238+ // the least significant digit is located at the last buf byte.
239+ const MAX_DEC_N : usize = $unsigned:: MAX . ilog( 10 ) as usize + 1 ;
240+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
241+ // Leading zero count & write index in buf.
242+ let mut offset = MAX_DEC_N ;
243+ // Consume decimals from working copy until none left.
244+ let mut remain = self ;
245+
246+ // Format per four digits from the lookup table.
247+ // Four digits need a 16-bit $unsigned or wider.
248+ #[ allow( overflowing_literals) ]
249+ #[ allow( unused_comparisons) ]
250+ while offset >= 4 && remain > 999 {
251+ let quad = remain % 100_00 ;
252+ remain /= 100_00 ;
253+ let p1 = ( quad / 100 ) as usize * 2 ;
254+ let p2 = ( quad % 100 ) as usize * 2 ;
255+ offset -= 4 ;
256+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ p1 + 0 ] ) ;
257+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ p1 + 1 ] ) ;
258+ buf[ offset + 2 ] . write( DEC_DIGITS_LUT [ p2 + 0 ] ) ;
259+ buf[ offset + 3 ] . write( DEC_DIGITS_LUT [ p2 + 1 ] ) ;
260+ }
272261
273- // if we reach here numbers are <= 9999, so at most 4 chars long
274- let mut n = self as usize ; // possibly reduce 64bit math
262+ // Format per two digits from the lookup table.
263+ while offset >= 2 && remain > 9 {
264+ let p = ( remain % 100 ) as usize * 2 ;
265+ remain /= 100 ;
266+ offset -= 2 ;
267+ buf[ offset + 0 ] . write( DEC_DIGITS_LUT [ p + 0 ] ) ;
268+ buf[ offset + 1 ] . write( DEC_DIGITS_LUT [ p + 1 ] ) ;
269+ }
275270
276- // decode 2 more chars, if > 2 chars
277- if n >= 100 {
278- let d1 = ( n % 100 ) << 1 ;
279- n /= 100 ;
280- curr -= 2 ;
281- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
282- }
271+ // Format the last remaining digit, if any.
272+ if offset != 0 && remain != 0 || offset == MAX_DEC_N {
273+ // Either the compiler sees that remain < 10, or it prevents
274+ // a boundary check up next.
275+ let p = ( remain % 10 ) as usize * 2 ;
276+ // not used: remain = 0;
283277
284- // if we reach here numbers are <= 100, so at most 2 chars long
285- // The biggest it can be is 99, and 99 << 1 == 198, so a `u8` is enough.
286- // decode last 1 or 2 chars
287- if n < 10 {
288- curr -= 1 ;
289- * buf_ptr. add( curr) = ( n as u8 ) + b'0' ;
290- } else {
291- let d1 = n << 1 ;
292- curr -= 2 ;
293- ptr:: copy_nonoverlapping( lut_ptr. add( d1) , buf_ptr. add( curr) , 2 ) ;
294- }
278+ offset -= 1 ;
279+ buf[ offset] . write( DEC_DIGITS_LUT [ p + 1 ] ) ;
295280 }
296281
297- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
298- // UTF-8 since `DEC_DIGITS_LUT` is
299- let buf_slice = unsafe {
300- str :: from_utf8_unchecked(
301- slice:: from_raw_parts( buf_ptr. add( curr) , buf. len( ) - curr) )
282+ // SAFETY: All buf content since offset is set with bytes form
283+ // the lookup table, which consists of valid ASCII exclusively.
284+ let decimals = unsafe {
285+ let written = & buf[ offset..] ;
286+ let as_init = MaybeUninit :: slice_assume_init_ref( written) ;
287+ str :: from_utf8_unchecked( as_init)
302288 } ;
303- f. pad_integral( is_nonnegative, "" , buf_slice )
289+ f. pad_integral( is_nonnegative, "" , decimals )
304290 }
305291 } ) *
306292
307293 #[ cfg( feature = "optimize_for_size" ) ]
308294 fn $gen_name( mut n: $u, is_nonnegative: bool , f: & mut fmt:: Formatter <' _>) -> fmt:: Result {
309- const SIZE : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
310- let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; SIZE ] ;
311- let mut curr = buf . len ( ) ;
295+ const MAX_DEC_N : usize = $u:: MAX . ilog( 10 ) as usize + 1 ;
296+ let mut buf = [ MaybeUninit :: <u8 >:: uninit( ) ; MAX_DEC_N ] ;
297+ let mut curr = MAX_DEC_N ;
312298 let buf_ptr = MaybeUninit :: slice_as_mut_ptr( & mut buf) ;
313299
314300 // SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
0 commit comments