@@ -613,22 +613,23 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
613613#[ cfg( not( test) ) ]
614614#[ cfg( not( no_global_oom_handling) ) ]
615615fn convert_while_ascii ( s : & str , convert : fn ( & u8 ) -> u8 ) -> ( String , & str ) {
616- // Process the input in chunks to enable auto-vectorization
617- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
618- const MAGIC_UNROLL : usize = 2 ;
619- const N : usize = USIZE_SIZE * MAGIC_UNROLL ;
616+ // Process the input in chunks of 16 bytes to enable auto-vectorization.
617+ // Previously the chunk size depended on the size of `usize`,
618+ // but on 32-bit platforms with sse or neon is also the better choice.
619+ // The only downside on other platforms would be a bit more loop-unrolling.
620+ const N : usize = 16 ;
620621
621622 let mut slice = s. as_bytes ( ) ;
622623 let mut out = Vec :: with_capacity ( slice. len ( ) ) ;
623- let mut out_slice = & mut out. spare_capacity_mut ( ) [ ..slice . len ( ) ] ;
624+ let mut out_slice = out. spare_capacity_mut ( ) ;
624625
625626 let mut ascii_prefix_len = 0_usize ;
626627 let mut is_ascii = [ false ; N ] ;
627628
628629 while slice. len ( ) >= N {
629630 // Safety: checked in loop condition
630631 let chunk = unsafe { slice. get_unchecked ( ..N ) } ;
631- // Safety: out_slice has same length as input slice and gets sliced with the same offsets
632+ // Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
632633 let out_chunk = unsafe { out_slice. get_unchecked_mut ( ..N ) } ;
633634
634635 for j in 0 ..N {
@@ -639,6 +640,7 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
639640 // size gives the best result, specifically a pmovmsk instruction on x86.
640641 // There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
641642 // be updated when this method is changed.
643+ // See also https://github.com/llvm/llvm-project/issues/96395
642644 if is_ascii. iter ( ) . map ( |x| * x as u8 ) . sum :: < u8 > ( ) as usize != N {
643645 break ;
644646 }
0 commit comments