@@ -39,6 +39,7 @@ use crate::result::Result::{Err, Ok};
3939/// Pure rust memchr implementation, taken from rust-memchr
4040pub mod memchr;
4141
42+ mod ascii;
4243mod cmp;
4344mod index;
4445mod iter;
@@ -3197,163 +3198,6 @@ impl<T> [T] {
31973198 }
31983199}
31993200
3200- #[ lang = "slice_u8" ]
3201- #[ cfg( not( test) ) ]
3202- impl [ u8 ] {
3203- /// Checks if all bytes in this slice are within the ASCII range.
3204- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3205- #[ inline]
3206- pub fn is_ascii ( & self ) -> bool {
3207- is_ascii ( self )
3208- }
3209-
3210- /// Checks that two slices are an ASCII case-insensitive match.
3211- ///
3212- /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
3213- /// but without allocating and copying temporaries.
3214- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3215- #[ inline]
3216- pub fn eq_ignore_ascii_case ( & self , other : & [ u8 ] ) -> bool {
3217- self . len ( ) == other. len ( ) && self . iter ( ) . zip ( other) . all ( |( a, b) | a. eq_ignore_ascii_case ( b) )
3218- }
3219-
3220- /// Converts this slice to its ASCII upper case equivalent in-place.
3221- ///
3222- /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
3223- /// but non-ASCII letters are unchanged.
3224- ///
3225- /// To return a new uppercased value without modifying the existing one, use
3226- /// [`to_ascii_uppercase`].
3227- ///
3228- /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
3229- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3230- #[ inline]
3231- pub fn make_ascii_uppercase ( & mut self ) {
3232- for byte in self {
3233- byte. make_ascii_uppercase ( ) ;
3234- }
3235- }
3236-
3237- /// Converts this slice to its ASCII lower case equivalent in-place.
3238- ///
3239- /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
3240- /// but non-ASCII letters are unchanged.
3241- ///
3242- /// To return a new lowercased value without modifying the existing one, use
3243- /// [`to_ascii_lowercase`].
3244- ///
3245- /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
3246- #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
3247- #[ inline]
3248- pub fn make_ascii_lowercase ( & mut self ) {
3249- for byte in self {
3250- byte. make_ascii_lowercase ( ) ;
3251- }
3252- }
3253- }
3254-
3255- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
3256- /// from `../str/mod.rs`, which does something similar for utf8 validation.
3257- #[ inline]
3258- fn contains_nonascii ( v : usize ) -> bool {
3259- const NONASCII_MASK : usize = 0x80808080_80808080u64 as usize ;
3260- ( NONASCII_MASK & v) != 0
3261- }
3262-
3263- /// Optimized ASCII test that will use usize-at-a-time operations instead of
3264- /// byte-at-a-time operations (when possible).
3265- ///
3266- /// The algorithm we use here is pretty simple. If `s` is too short, we just
3267- /// check each byte and be done with it. Otherwise:
3268- ///
3269- /// - Read the first word with an unaligned load.
3270- /// - Align the pointer, read subsequent words until end with aligned loads.
3271- /// - Read the last `usize` from `s` with an unaligned load.
3272- ///
3273- /// If any of these loads produces something for which `contains_nonascii`
3274- /// (above) returns true, then we know the answer is false.
3275- #[ inline]
3276- fn is_ascii ( s : & [ u8 ] ) -> bool {
3277- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
3278-
3279- let len = s. len ( ) ;
3280- let align_offset = s. as_ptr ( ) . align_offset ( USIZE_SIZE ) ;
3281-
3282- // If we wouldn't gain anything from the word-at-a-time implementation, fall
3283- // back to a scalar loop.
3284- //
3285- // We also do this for architectures where `size_of::<usize>()` isn't
3286- // sufficient alignment for `usize`, because it's a weird edge case.
3287- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem:: align_of :: < usize > ( ) {
3288- return s. iter ( ) . all ( |b| b. is_ascii ( ) ) ;
3289- }
3290-
3291- // We always read the first word unaligned, which means `align_offset` is
3292- // 0, we'd read the same value again for the aligned read.
3293- let offset_to_aligned = if align_offset == 0 { USIZE_SIZE } else { align_offset } ;
3294-
3295- let start = s. as_ptr ( ) ;
3296- // SAFETY: We verify `len < USIZE_SIZE` above.
3297- let first_word = unsafe { ( start as * const usize ) . read_unaligned ( ) } ;
3298-
3299- if contains_nonascii ( first_word) {
3300- return false ;
3301- }
3302- // We checked this above, somewhat implicitly. Note that `offset_to_aligned`
3303- // is either `align_offset` or `USIZE_SIZE`, both of are explicitly checked
3304- // above.
3305- debug_assert ! ( offset_to_aligned <= len) ;
3306-
3307- // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the
3308- // middle chunk of the slice.
3309- let mut word_ptr = unsafe { start. add ( offset_to_aligned) as * const usize } ;
3310-
3311- // `byte_pos` is the byte index of `word_ptr`, used for loop end checks.
3312- let mut byte_pos = offset_to_aligned;
3313-
3314- // Paranoia check about alignment, since we're about to do a bunch of
3315- // unaligned loads. In practice this should be impossible barring a bug in
3316- // `align_offset` though.
3317- debug_assert_eq ! ( ( word_ptr as usize ) % mem:: align_of:: <usize >( ) , 0 ) ;
3318-
3319- // Read subsequent words until the last aligned word, excluding the last
3320- // aligned word by itself to be done in tail check later, to ensure that
3321- // tail is always one `usize` at most to extra branch `byte_pos == len`.
3322- while byte_pos < len - USIZE_SIZE {
3323- debug_assert ! (
3324- // Sanity check that the read is in bounds
3325- ( word_ptr as usize + USIZE_SIZE ) <= ( start. wrapping_add( len) as usize ) &&
3326- // And that our assumptions about `byte_pos` hold.
3327- ( word_ptr as usize ) - ( start as usize ) == byte_pos
3328- ) ;
3329-
3330- // SAFETY: We know `word_ptr` is properly aligned (because of
3331- // `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
3332- let word = unsafe { word_ptr. read ( ) } ;
3333- if contains_nonascii ( word) {
3334- return false ;
3335- }
3336-
3337- byte_pos += USIZE_SIZE ;
3338- // SAFETY: We know that `byte_pos <= len - USIZE_SIZE`, which means that
3339- // after this `add`, `word_ptr` will be at most one-past-the-end.
3340- word_ptr = unsafe { word_ptr. add ( 1 ) } ;
3341- }
3342-
3343- // Sanity check to ensure there really is only one `usize` left. This should
3344- // be guaranteed by our loop condition.
3345- debug_assert ! ( byte_pos <= len && len - byte_pos <= USIZE_SIZE ) ;
3346-
3347- // SAFETY: This relies on `len >= USIZE_SIZE`, which we check at the start.
3348- let last_word = unsafe { ( start. add ( len - USIZE_SIZE ) as * const usize ) . read_unaligned ( ) } ;
3349-
3350- !contains_nonascii ( last_word)
3351- }
3352-
3353- ////////////////////////////////////////////////////////////////////////////////
3354- // Common traits
3355- ////////////////////////////////////////////////////////////////////////////////
3356-
33573201#[ stable( feature = "rust1" , since = "1.0.0" ) ]
33583202impl < T > Default for & [ T ] {
33593203 /// Creates an empty slice.
0 commit comments