@@ -10,9 +10,10 @@ use crate::ops;
1010impl [ u8 ] {
1111 /// Checks if all bytes in this slice are within the ASCII range.
1212 #[ stable( feature = "ascii_methods_on_intrinsics" , since = "1.23.0" ) ]
13+ #[ rustc_const_unstable( feature = "const_slice_is_ascii" , issue = "111090" ) ]
1314 #[ must_use]
1415 #[ inline]
15- pub fn is_ascii ( & self ) -> bool {
16+ pub const fn is_ascii ( & self ) -> bool {
1617 is_ascii ( self )
1718 }
1819
@@ -21,7 +22,7 @@ impl [u8] {
2122 #[ unstable( feature = "ascii_char" , issue = "110998" ) ]
2223 #[ must_use]
2324 #[ inline]
24- pub fn as_ascii ( & self ) -> Option < & [ ascii:: Char ] > {
25+ pub const fn as_ascii ( & self ) -> Option < & [ ascii:: Char ] > {
2526 if self . is_ascii ( ) {
2627 // SAFETY: Just checked that it's ASCII
2728 Some ( unsafe { self . as_ascii_unchecked ( ) } )
@@ -262,11 +263,29 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
262263/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
263264/// from `../str/mod.rs`, which does something similar for utf8 validation.
264265#[ inline]
265- fn contains_nonascii ( v : usize ) -> bool {
266+ const fn contains_nonascii ( v : usize ) -> bool {
266267 const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
267268 ( NONASCII_MASK & v) != 0
268269}
269270
271+ /// ASCII test *without* the chunk-at-a-time optimizations.
272+ ///
273+ /// This is carefully structured to produce nice small code -- it's smaller in
274+ /// `-O` than what the "obvious" ways produces under `-C opt-level=s`. If you
275+ /// touch it, be sure to run (and update if needed) the assembly test.
276+ #[ unstable( feature = "str_internals" , issue = "none" ) ]
277+ #[ doc( hidden) ]
278+ #[ inline]
279+ pub const fn is_ascii_simple ( mut bytes : & [ u8 ] ) -> bool {
280+ while let [ rest @ .., last] = bytes {
281+ if !last. is_ascii ( ) {
282+ break ;
283+ }
284+ bytes = rest;
285+ }
286+ bytes. is_empty ( )
287+ }
288+
270289/// Optimized ASCII test that will use usize-at-a-time operations instead of
271290/// byte-at-a-time operations (when possible).
272291///
@@ -280,7 +299,7 @@ fn contains_nonascii(v: usize) -> bool {
280299/// If any of these loads produces something for which `contains_nonascii`
281300/// (above) returns true, then we know the answer is false.
282301#[ inline]
283- fn is_ascii ( s : & [ u8 ] ) -> bool {
302+ const fn is_ascii ( s : & [ u8 ] ) -> bool {
284303 const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
285304
286305 let len = s. len ( ) ;
@@ -292,7 +311,7 @@ fn is_ascii(s: &[u8]) -> bool {
292311 // We also do this for architectures where `size_of::<usize>()` isn't
293312 // sufficient alignment for `usize`, because it's a weird edge case.
294313 if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem:: align_of :: < usize > ( ) {
295- return s . iter ( ) . all ( |b| b . is_ascii ( ) ) ;
314+ return is_ascii_simple ( s ) ;
296315 }
297316
298317 // We always read the first word unaligned, which means `align_offset` is
@@ -321,18 +340,26 @@ fn is_ascii(s: &[u8]) -> bool {
321340 // Paranoia check about alignment, since we're about to do a bunch of
322341 // unaligned loads. In practice this should be impossible barring a bug in
323342 // `align_offset` though.
324- debug_assert_eq ! ( word_ptr. addr( ) % mem:: align_of:: <usize >( ) , 0 ) ;
343+ // While this method is allowed to spuriously fail in CTFE, if it doesn't
344+ // have alignment information it should have given a `usize::MAX` for
345+ // `align_offset` earlier, sending things through the scalar path instead of
346+ // this one, so this check should pass if it's reachable.
347+ debug_assert ! ( word_ptr. is_aligned_to( mem:: align_of:: <usize >( ) ) ) ;
325348
326349 // Read subsequent words until the last aligned word, excluding the last
327350 // aligned word by itself to be done in tail check later, to ensure that
328351 // tail is always one `usize` at most to extra branch `byte_pos == len`.
329352 while byte_pos < len - USIZE_SIZE {
330- debug_assert ! (
331- // Sanity check that the read is in bounds
332- ( word_ptr. addr( ) + USIZE_SIZE ) <= start. addr( ) . wrapping_add( len) &&
333- // And that our assumptions about `byte_pos` hold.
334- ( word_ptr. addr( ) - start. addr( ) ) == byte_pos
335- ) ;
353+ // Sanity check that the read is in bounds
354+ debug_assert ! ( byte_pos + USIZE_SIZE <= len) ;
355+ // And that our assumptions about `byte_pos` hold.
356+ debug_assert ! ( matches!(
357+ word_ptr. cast:: <u8 >( ) . guaranteed_eq( start. wrapping_add( byte_pos) ) ,
358+ // These are from the same allocation, so will hopefully always be
359+ // known to match even in CTFE, but if it refuses to compare them
360+ // that's ok since it's just a debug check anyway.
361+ None | Some ( true ) ,
362+ ) ) ;
336363
337364 // SAFETY: We know `word_ptr` is properly aligned (because of
338365 // `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
0 commit comments