22
33use super :: Utf8Error ;
44use crate :: intrinsics:: { const_eval_select, unlikely} ;
5+ use crate :: mem;
56
67/// Returns the initial codepoint accumulator for the first byte.
78/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
@@ -163,6 +164,13 @@ const ST_ERROR: u32 = 0 * BITS_PER_STATE as u32;
163164#[ allow( clippy:: all) ]
164165const ST_ACCEPT : u32 = 1 * BITS_PER_STATE as u32 ;
165166
167+ /// Platforms that does not have efficient 64-bit shift and should use 32-bit shift fallback.
168+ const USE_SHIFT32 : bool = cfg ! ( all(
169+ any( target_pointer_width = "16" , target_pointer_width = "32" ) ,
170+ // WASM32 supports 64-bit shift.
171+ not( target_arch = "wasm32" ) ,
172+ ) ) ;
173+
166174// After storing STATE_CNT * BITS_PER_STATE = 54bits on 64-bit platform, or (STATE_CNT - 5)
167175// * BITS_PER_STATE = 24bits on 32-bit platform, we still have some high bits left.
168176// They will never be used via state transition.
@@ -218,13 +226,12 @@ static TRANS_TABLE: [u64; 256] = {
218226
219227 // On platforms without 64-bit shift, align states 5..10 to 32-bit boundary.
220228 // See docs above for details.
221- let need_align = cfg ! ( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ;
222229 let mut bits = 0u64 ;
223230 let mut j = 0 ;
224231 while j < to. len ( ) {
225232 let to_off =
226- to[ j] * BITS_PER_STATE as u64 + if need_align && to[ j] >= 5 { 2 } else { 0 } ;
227- let off = j as u32 * BITS_PER_STATE + if need_align && j >= 5 { 2 } else { 0 } ;
233+ to[ j] * BITS_PER_STATE as u64 + if USE_SHIFT32 && to[ j] >= 5 { 2 } else { 0 } ;
234+ let off = j as u32 * BITS_PER_STATE + if USE_SHIFT32 && j >= 5 { 2 } else { 0 } ;
228235 bits |= to_off << off;
229236 j += 1 ;
230237 }
@@ -244,20 +251,17 @@ static TRANS_TABLE: [u64; 256] = {
244251 table
245252} ;
246253
247- #[ cfg( not( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ) ]
248254#[ inline( always) ]
249255const fn next_state ( st : u32 , byte : u8 ) -> u32 {
250- TRANS_TABLE [ byte as usize ] . wrapping_shr ( st as _ ) as _
251- }
252-
253- #[ cfg( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ]
254- #[ inline( always) ]
255- const fn next_state ( st : u32 , byte : u8 ) -> u32 {
256- // SAFETY: `u64` is more aligned than `u32`, and has the same repr as `[u32; 2]`.
257- let [ lo, hi] = unsafe { crate :: mem:: transmute :: < u64 , [ u32 ; 2 ] > ( TRANS_TABLE [ byte as usize ] ) } ;
258- #[ cfg( target_endian = "big" ) ]
259- let ( lo, hi) = ( hi, lo) ;
260- if st & 32 == 0 { lo } else { hi } . wrapping_shr ( st)
256+ if USE_SHIFT32 {
257+ // SAFETY: `u64` is more aligned than `u32`, and has the same repr as `[u32; 2]`.
258+ let [ lo, hi] = unsafe { mem:: transmute :: < u64 , [ u32 ; 2 ] > ( TRANS_TABLE [ byte as usize ] ) } ;
259+ #[ cfg( target_endian = "big" ) ]
260+ let ( lo, hi) = ( hi, lo) ;
261+ if st & 32 == 0 { lo } else { hi } . wrapping_shr ( st)
262+ } else {
263+ TRANS_TABLE [ byte as usize ] . wrapping_shr ( st as _ ) as _
264+ }
261265}
262266
263267/// Check if `byte` is a valid UTF-8 first byte, assuming it must be a valid first or
0 commit comments