@@ -1158,24 +1158,27 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
11581158 offset += 1 ;
11591159 } else {
11601160 // Ascii case, try to skip forward quickly.
1161+ // When the pointer is aligned, read 2 words of data per iteration
1162+ // until we find a word containing a non-ascii byte.
1163+ const BYTES_PER_ITERATION : usize = 2 * usize:: BYTES ;
11611164 let ptr = v. as_ptr ( ) ;
11621165 let align = ( ptr as usize + offset) & ( usize:: BYTES - 1 ) ;
11631166 if align == 0 {
1164- // When the pointer is aligned, read 2 words of data per iteration
1165- // until we find a word containing a non-ascii byte.
1166- while offset <= len - 2 * usize :: BYTES {
1167- unsafe {
1168- let u = * ( ptr. offset ( offset as isize ) as * const usize ) ;
1169- let v = * ( ptr . offset ( ( offset + usize :: BYTES ) as isize ) as * const usize ) ;
1170-
1171- // break if there is a nonascii byte
1172- let zu = contains_nonascii ( u ) ;
1173- let zv = contains_nonascii ( v ) ;
1174- if zu || zv {
1175- break ;
1167+ if len >= BYTES_PER_ITERATION {
1168+ while offset <= len - BYTES_PER_ITERATION {
1169+ unsafe {
1170+ let u = * ( ptr . offset ( offset as isize ) as * const usize ) ;
1171+ let v = * ( ptr. offset ( ( offset + usize :: BYTES ) as isize ) as * const usize ) ;
1172+
1173+ // break if there is a nonascii byte
1174+ let zu = contains_nonascii ( u ) ;
1175+ let zv = contains_nonascii ( v ) ;
1176+ if zu || zv {
1177+ break ;
1178+ }
11761179 }
1180+ offset += BYTES_PER_ITERATION ;
11771181 }
1178- offset += usize:: BYTES * 2 ;
11791182 }
11801183 // step from the point where the wordwise loop stopped
11811184 while offset < len && v[ offset] < 128 {
0 commit comments