33use core:: ascii:: EscapeDefault ;
44
55use crate :: fmt:: { self , Write } ;
6- use crate :: { ascii, iter, mem , ops} ;
6+ use crate :: { ascii, iter, ops} ;
77
88#[ cfg( not( test) ) ]
99impl [ u8 ] {
@@ -297,14 +297,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
297297 }
298298}
299299
300- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
301- /// from `../str/mod.rs`, which does something similar for utf8 validation.
302- #[ inline]
303- const fn contains_nonascii ( v : usize ) -> bool {
304- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
305- ( NONASCII_MASK & v) != 0
306- }
307-
308300/// ASCII test *without* the chunk-at-a-time optimizations.
309301///
310302/// This is carefully structured to produce nice small code -- it's smaller in
@@ -335,9 +327,17 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
335327///
336328/// If any of these loads produces something for which `contains_nonascii`
337329/// (above) returns true, then we know the answer is false.
330+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
338331#[ inline]
339332const fn is_ascii ( s : & [ u8 ] ) -> bool {
340- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
333+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
334+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
335+ const fn contains_nonascii ( v : usize ) -> bool {
336+ const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
337+ ( NONASCII_MASK & v) != 0
338+ }
339+
340+ const USIZE_SIZE : usize = size_of :: < usize > ( ) ;
341341
342342 let len = s. len ( ) ;
343343 let align_offset = s. as_ptr ( ) . align_offset ( USIZE_SIZE ) ;
@@ -347,7 +347,7 @@ const fn is_ascii(s: &[u8]) -> bool {
347347 //
348348 // We also do this for architectures where `size_of::<usize>()` isn't
349349 // sufficient alignment for `usize`, because it's a weird edge case.
350- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of :: < usize > ( ) {
350+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of :: < usize > ( ) {
351351 return is_ascii_simple ( s) ;
352352 }
353353
@@ -381,7 +381,7 @@ const fn is_ascii(s: &[u8]) -> bool {
381381 // have alignment information it should have given a `usize::MAX` for
382382 // `align_offset` earlier, sending things through the scalar path instead of
383383 // this one, so this check should pass if it's reachable.
384- debug_assert ! ( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
384+ debug_assert ! ( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
385385
386386 // Read subsequent words until the last aligned word, excluding the last
387387 // aligned word by itself to be done in tail check later, to ensure that
@@ -420,3 +420,48 @@ const fn is_ascii(s: &[u8]) -> bool {
420420
421421 !contains_nonascii ( last_word)
422422}
423+
424+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
425+ /// platforms.
426+ ///
427+ /// Other platforms are not likely to benefit from this code structure, so they
428+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
429+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
430+ #[ inline]
431+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
432+ // Process chunks of 32 bytes at a time in the fast path to enable
433+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
434+ // can be OR'd together and then the resulting vector can be tested for
435+ // non-ASCII bytes.
436+ const CHUNK_SIZE : usize = 32 ;
437+
438+ let mut i = 0 ;
439+
440+ while i + CHUNK_SIZE <= bytes. len ( ) {
441+ let chunk_end = i + CHUNK_SIZE ;
442+
443+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
444+ // creates a mask from the most significant bit of each byte.
445+ // ASCII bytes are less than 128 (0x80), so their most significant
446+ // bit is unset.
447+ let mut count = 0 ;
448+ while i < chunk_end {
449+ count += bytes[ i] . is_ascii ( ) as u8 ;
450+ i += 1 ;
451+ }
452+
453+ // All bytes should be <= 127 so count is equal to chunk size.
454+ if count != CHUNK_SIZE as u8 {
455+ return false ;
456+ }
457+ }
458+
459+ // Process the remaining `bytes.len() % N` bytes.
460+ let mut is_ascii = true ;
461+ while i < bytes. len ( ) {
462+ is_ascii &= bytes[ i] . is_ascii ( ) ;
463+ i += 1 ;
464+ }
465+
466+ is_ascii
467+ }
0 commit comments