@@ -135,7 +135,11 @@ where
135135// and it becomes free on modern ISAs, including x86, x86_64 and ARM.
136136//
137137// ```
138- // // shrx state, qword ptr [table_addr + 8 * byte], state # On x86-64-v3
138+ // // On x86-64-v3: (more instructions on ordinary x86_64 but with same cycles-per-byte)
139+ // // shrx state, qword ptr [TRANS_TABLE + 4 * byte], state
140+ // // On aarch64/ARMv8:
141+ // // ldr temp, [TRANS_TABLE, byte, lsl 2]
142+ // // lsr state, temp, state
139143// state = TRANS_TABLE[byte].wrapping_shr(state);
140144// ```
141145//
@@ -290,27 +294,28 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
290294 // SAFETY: Start at initial state ACCEPT.
291295 let mut st = unsafe { run_with_error_handling ( ST_ACCEPT , & bytes[ ..i] , 0 ) ? } ;
292296
293- while i + MAIN_CHUNK_SIZE <= bytes. len ( ) {
297+ while i < bytes. len ( ) {
294298 // Fast path: if the current state is ACCEPT, we can skip to the next non-ASCII chunk.
295299 // We also did a quick inspection on the first byte to avoid getting into this path at all
296300 // when handling strings with almost no ASCII, eg. Chinese scripts.
297301 // SAFETY: `i` is in bound.
298- if st == ST_ACCEPT && unsafe { * bytes. get_unchecked ( i) } < 0x80 {
302+ if st == ST_ACCEPT && unsafe { bytes. get_unchecked ( i) . is_ascii ( ) } {
299303 // SAFETY: `i` is in bound.
300304 let rest = unsafe { bytes. get_unchecked ( i..) } ;
301305 let mut ascii_chunks = rest. array_chunks :: < ASCII_CHUNK_SIZE > ( ) ;
302306 let ascii_rest_chunk_cnt = ascii_chunks. len ( ) ;
303307 let pos = ascii_chunks
304308 . position ( |chunk| {
305- // NB. Always traverse the whole chunk to enable vectorization, instead of `.any()`.
306- // LLVM will be fear of memory traps and fallback if loop has short-circuit.
309+ // NB. Always traverse the whole chunk instead of `.all()`, to persuade LLVM to
310+ // vectorize this check.
311+ // We also do not use `<[u8]>::is_ascii` which is unnecessarily complex here.
307312 #[ expect( clippy:: unnecessary_fold) ]
308- let has_non_ascii = chunk. iter ( ) . fold ( false , |acc, & b| acc || ( b >= 0x80 ) ) ;
309- has_non_ascii
313+ let all_ascii = chunk. iter ( ) . fold ( true , |acc, b| acc && b . is_ascii ( ) ) ;
314+ !all_ascii
310315 } )
311316 . unwrap_or ( ascii_rest_chunk_cnt) ;
312317 i += pos * ASCII_CHUNK_SIZE ;
313- if i + MAIN_CHUNK_SIZE > bytes. len ( ) {
318+ if i >= bytes. len ( ) {
314319 break ;
315320 }
316321 }
0 commit comments