33use core:: ascii:: EscapeDefault ;
44
55use crate :: fmt:: { self , Write } ;
6+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
67use crate :: intrinsics:: const_eval_select;
7- use crate :: { ascii, iter, mem , ops} ;
8+ use crate :: { ascii, iter, ops} ;
89
910#[ cfg( not( test) ) ]
1011impl [ u8 ] {
@@ -328,14 +329,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
328329 }
329330}
330331
331- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
332- /// from `../str/mod.rs`, which does something similar for utf8 validation.
333- #[ inline]
334- const fn contains_nonascii ( v : usize ) -> bool {
335- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
336- ( NONASCII_MASK & v) != 0
337- }
338-
339332/// ASCII test *without* the chunk-at-a-time optimizations.
340333///
341334/// This is carefully structured to produce nice small code -- it's smaller in
@@ -366,6 +359,7 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
366359///
367360/// If any of these loads produces something for which `contains_nonascii`
368361/// (above) returns true, then we know the answer is false.
362+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
369363#[ inline]
370364#[ rustc_allow_const_fn_unstable( const_eval_select) ] // fallback impl has same behavior
371365const fn is_ascii ( s : & [ u8 ] ) -> bool {
@@ -376,7 +370,14 @@ const fn is_ascii(s: &[u8]) -> bool {
376370 if const {
377371 is_ascii_simple( s)
378372 } else {
379- const USIZE_SIZE : usize = mem:: size_of:: <usize >( ) ;
373+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
374+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
375+ const fn contains_nonascii( v: usize ) -> bool {
376+ const NONASCII_MASK : usize = usize :: repeat_u8( 0x80 ) ;
377+ ( NONASCII_MASK & v) != 0
378+ }
379+
380+ const USIZE_SIZE : usize = size_of:: <usize >( ) ;
380381
381382 let len = s. len( ) ;
382383 let align_offset = s. as_ptr( ) . align_offset( USIZE_SIZE ) ;
@@ -386,7 +387,7 @@ const fn is_ascii(s: &[u8]) -> bool {
386387 //
387388 // We also do this for architectures where `size_of::<usize>()` isn't
388389 // sufficient alignment for `usize`, because it's a weird edge case.
389- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of:: <usize >( ) {
390+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of:: <usize >( ) {
390391 return is_ascii_simple( s) ;
391392 }
392393
@@ -420,7 +421,7 @@ const fn is_ascii(s: &[u8]) -> bool {
420421 // have alignment information it should have given a `usize::MAX` for
421422 // `align_offset` earlier, sending things through the scalar path instead of
422423 // this one, so this check should pass if it's reachable.
423- debug_assert!( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
424+ debug_assert!( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
424425
425426 // Read subsequent words until the last aligned word, excluding the last
426427 // aligned word by itself to be done in tail check later, to ensure that
@@ -455,3 +456,48 @@ const fn is_ascii(s: &[u8]) -> bool {
455456 }
456457 )
457458}
459+
460+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
461+ /// platforms.
462+ ///
463+ /// Other platforms are not likely to benefit from this code structure, so they
464+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
465+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
466+ #[ inline]
467+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
468+ // Process chunks of 32 bytes at a time in the fast path to enable
469+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
470+ // can be OR'd together and then the resulting vector can be tested for
471+ // non-ASCII bytes.
472+ const CHUNK_SIZE : usize = 32 ;
473+
474+ let mut i = 0 ;
475+
476+ while i + CHUNK_SIZE <= bytes. len ( ) {
477+ let chunk_end = i + CHUNK_SIZE ;
478+
479+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
480+ // creates a mask from the most significant bit of each byte.
481+ // ASCII bytes are less than 128 (0x80), so their most significant
482+ // bit is unset.
483+ let mut count = 0 ;
484+ while i < chunk_end {
485+ count += bytes[ i] . is_ascii ( ) as u8 ;
486+ i += 1 ;
487+ }
488+
489+ // All bytes should be <= 127 so count is equal to chunk size.
490+ if count != CHUNK_SIZE as u8 {
491+ return false ;
492+ }
493+ }
494+
495+ // Process the remaining `bytes.len() % N` bytes.
496+ let mut is_ascii = true ;
497+ while i < bytes. len ( ) {
498+ is_ascii &= bytes[ i] . is_ascii ( ) ;
499+ i += 1 ;
500+ }
501+
502+ is_ascii
503+ }
0 commit comments