33use core:: ascii:: EscapeDefault ;
44
55use crate :: fmt:: { self , Write } ;
6+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
67use crate :: intrinsics:: const_eval_select;
7- use crate :: { ascii, iter, mem , ops} ;
8+ use crate :: { ascii, iter, ops} ;
89
910#[ cfg( not( test) ) ]
1011impl [ u8 ] {
@@ -308,14 +309,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
308309 }
309310}
310311
311- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
312- /// from `../str/mod.rs`, which does something similar for utf8 validation.
313- #[ inline]
314- const fn contains_nonascii ( v : usize ) -> bool {
315- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
316- ( NONASCII_MASK & v) != 0
317- }
318-
319312/// ASCII test *without* the chunk-at-a-time optimizations.
320313///
321314/// This is carefully structured to produce nice small code -- it's smaller in
@@ -346,6 +339,7 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
346339///
347340/// If any of these loads produces something for which `contains_nonascii`
348341/// (above) returns true, then we know the answer is false.
342+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
349343#[ inline]
350344#[ rustc_allow_const_fn_unstable( const_eval_select) ] // fallback impl has same behavior
351345const fn is_ascii ( s : & [ u8 ] ) -> bool {
@@ -356,7 +350,14 @@ const fn is_ascii(s: &[u8]) -> bool {
356350 if const {
357351 is_ascii_simple( s)
358352 } else {
359- const USIZE_SIZE : usize = mem:: size_of:: <usize >( ) ;
353+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
354+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
355+ const fn contains_nonascii( v: usize ) -> bool {
356+ const NONASCII_MASK : usize = usize :: repeat_u8( 0x80 ) ;
357+ ( NONASCII_MASK & v) != 0
358+ }
359+
360+ const USIZE_SIZE : usize = size_of:: <usize >( ) ;
360361
361362 let len = s. len( ) ;
362363 let align_offset = s. as_ptr( ) . align_offset( USIZE_SIZE ) ;
@@ -366,7 +367,7 @@ const fn is_ascii(s: &[u8]) -> bool {
366367 //
367368 // We also do this for architectures where `size_of::<usize>()` isn't
368369 // sufficient alignment for `usize`, because it's a weird edge case.
369- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of:: <usize >( ) {
370+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of:: <usize >( ) {
370371 return is_ascii_simple( s) ;
371372 }
372373
@@ -400,7 +401,7 @@ const fn is_ascii(s: &[u8]) -> bool {
400401 // have alignment information it should have given a `usize::MAX` for
401402 // `align_offset` earlier, sending things through the scalar path instead of
402403 // this one, so this check should pass if it's reachable.
403- debug_assert!( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
404+ debug_assert!( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
404405
405406 // Read subsequent words until the last aligned word, excluding the last
406407 // aligned word by itself to be done in tail check later, to ensure that
@@ -435,3 +436,48 @@ const fn is_ascii(s: &[u8]) -> bool {
435436 }
436437 )
437438}
439+
440+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
441+ /// platforms.
442+ ///
443+ /// Other platforms are not likely to benefit from this code structure, so they
444+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
445+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
446+ #[ inline]
447+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
448+ // Process chunks of 32 bytes at a time in the fast path to enable
449+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
450+ // can be OR'd together and then the resulting vector can be tested for
451+ // non-ASCII bytes.
452+ const CHUNK_SIZE : usize = 32 ;
453+
454+ let mut i = 0 ;
455+
456+ while i + CHUNK_SIZE <= bytes. len ( ) {
457+ let chunk_end = i + CHUNK_SIZE ;
458+
459+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
460+ // creates a mask from the most significant bit of each byte.
461+ // ASCII bytes are less than 128 (0x80), so their most significant
462+ // bit is unset.
463+ let mut count = 0 ;
464+ while i < chunk_end {
465+ count += bytes[ i] . is_ascii ( ) as u8 ;
466+ i += 1 ;
467+ }
468+
469+ // All bytes should be <= 127 so count is equal to chunk size.
470+ if count != CHUNK_SIZE as u8 {
471+ return false ;
472+ }
473+ }
474+
475+ // Process the remaining `bytes.len() % N` bytes.
476+ let mut is_ascii = true ;
477+ while i < bytes. len ( ) {
478+ is_ascii &= bytes[ i] . is_ascii ( ) ;
479+ i += 1 ;
480+ }
481+
482+ is_ascii
483+ }
0 commit comments