From 4d2bab84d6a4d7acce886135d3be7468aae81a84 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sat, 18 Oct 2025 01:21:47 +0100 Subject: [PATCH 1/4] Precommit tests for `SliceIndex` method codegen Add a `codegen-llvm` test to check the number of `icmp` instrucitons generated for each `SliceIndex` method on the various range types. This will be updated in the next commit when `SliceIndex::get` is optimized for `RangeInclusive`. --- tests/codegen-llvm/slice-range-indexing.rs | 85 ++++++++++++++++++++++ tests/codegen-llvm/str-range-indexing.rs | 75 +++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 tests/codegen-llvm/slice-range-indexing.rs create mode 100644 tests/codegen-llvm/str-range-indexing.rs diff --git a/tests/codegen-llvm/slice-range-indexing.rs b/tests/codegen-llvm/slice-range-indexing.rs new file mode 100644 index 0000000000000..af8ff00bf6f80 --- /dev/null +++ b/tests/codegen-llvm/slice-range-indexing.rs @@ -0,0 +1,85 @@ +//@ compile-flags: -Copt-level=3 +//@ min-llvm-version: 21 + +#![crate_type = "lib"] + +use std::ops::{Range, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive}; + +macro_rules! tests { + ($range_ty:ty, $get_func_name:ident, $index_func_name:ident) => { + #[no_mangle] + pub fn $get_func_name(slice: &[u32], range: $range_ty) -> Option<&[u32]> { + slice.get(range) + } + + #[no_mangle] + pub fn $index_func_name(slice: &[u32], range: $range_ty) -> &[u32] { + &slice[range] + } + }; +} + +// 2 comparisons required: (range.end < slice.len()) && (range.start <= range.end) +// CHECK-LABEL: @get_range +// CHECK-COUNT-2: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// 2 comparisons required: (range.end < slice.len()) && (range.start <= range.end) +// CHECK-LABEL: @index_range +// CHECK-COUNT-2: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(Range, get_range, index_range); + +// 1 comparison required: (range.end < slice.len()) +// CHECK-LABEL: @get_range_to +// CHECK-COUNT-1: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// 1 comparison required: (range.end < slice.len()) +// CHECK-LABEL: @index_range_to +// CHECK-COUNT-1: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeTo, get_range_to, index_range_to); + +// 1 comparison required: (range.start <= slice.len()) +// CHECK-LABEL: @get_range_from +// CHECK-COUNT-1: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// 1 comparison required: (range.start <= slice.len()) +// CHECK-LABEL: @index_range_from +// CHECK-COUNT-1: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeFrom, get_range_from, index_range_from); + +// 3 comparisons required: (range.end != usize::MAX) && (range.end < slice.len()) && (range.start <= range.end + 1) +// CHECK-LABEL: @get_range_inclusive +// CHECK-COUNT-3: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// 2 comparisons required: (range.end < slice.len()) && (range.start <= range.end + 1) +// CHECK-LABEL: @index_range_inclusive +// CHECK-COUNT-2: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); + +// 2 comparisons required: (range.end != usize::MAX) && (range.end < slice.len()) +// CHECK-LABEL: @get_range_to_inclusive +// CHECK-COUNT-2: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// 1 comparison required: (range.end < slice.len()) +// CHECK-LABEL: @index_range_to_inclusive +// CHECK-COUNT-1: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeToInclusive, get_range_to_inclusive, index_range_to_inclusive); diff --git a/tests/codegen-llvm/str-range-indexing.rs b/tests/codegen-llvm/str-range-indexing.rs new file mode 100644 index 0000000000000..fed5b65267d83 --- /dev/null +++ b/tests/codegen-llvm/str-range-indexing.rs @@ -0,0 +1,75 @@ +//@ compile-flags: -Copt-level=3 +//@ min-llvm-version: 21 + +#![crate_type = "lib"] + +use std::ops::{Range, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive}; + +macro_rules! tests { + ($range_ty:ty, $get_func_name:ident, $index_func_name:ident) => { + #[no_mangle] + pub fn $get_func_name(slice: &str, range: $range_ty) -> Option<&str> { + slice.get(range) + } + + #[no_mangle] + pub fn $index_func_name(slice: &str, range: $range_ty) -> &str { + &slice[range] + } + }; +} + +// CHECK-LABEL: @get_range +// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// CHECK-LABEL: @index_range +// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(Range, get_range, index_range); + +// CHECK-LABEL: @get_range_to +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// CHECK-LABEL: @index_range_to +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeTo, get_range_to, index_range_to); + +// CHECK-LABEL: @get_range_from +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// CHECK-LABEL: @index_range_from +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeFrom, get_range_from, index_range_from); + +// CHECK-LABEL: @get_range_inclusive +// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// CHECK-LABEL: @index_range_inclusive +// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); + +// CHECK-LABEL: @get_range_to_inclusive +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret + +// CHECK-LABEL: @index_range_to_inclusive +// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-NOT: %{{.+}} = icmp +// CHECK: ret +tests!(RangeToInclusive, get_range_to_inclusive, index_range_to_inclusive); From ed051ecefbac2855572fcdef6c6a7bbe5f04a117 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Fri, 25 Jul 2025 19:51:21 +0100 Subject: [PATCH 2/4] Optimize `SliceIndex::get` impl for `RangeInclusive` The check for `self.end() == usize::MAX` can be combined with the `self.end() + 1 > slice.len()` check into `self.end() >= slice.len()`, since `self.end() < slice.len()` implies both `self.end() <= slice.len()` and `self.end() < usize::MAX`. --- library/core/src/slice/index.rs | 9 ++++----- tests/codegen-llvm/slice-range-indexing.rs | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/library/core/src/slice/index.rs b/library/core/src/slice/index.rs index a9806060d3d81..a07ab0bbb4084 100644 --- a/library/core/src/slice/index.rs +++ b/library/core/src/slice/index.rs @@ -658,7 +658,6 @@ unsafe impl const SliceIndex<[T]> for ops::RangeFull { } /// The methods `index` and `index_mut` panic if: -/// - the end of the range is `usize::MAX` or /// - the start of the range is greater than the end of the range or /// - the end of the range is out of bounds. #[stable(feature = "inclusive_range", since = "1.26.0")] @@ -668,12 +667,12 @@ unsafe impl const SliceIndex<[T]> for ops::RangeInclusive { #[inline] fn get(self, slice: &[T]) -> Option<&[T]> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) } } #[inline] fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) } } #[inline] @@ -697,7 +696,7 @@ unsafe impl const SliceIndex<[T]> for ops::RangeInclusive { start = if exhausted { end } else { start }; if let Some(new_len) = usize::checked_sub(end, start) { // SAFETY: `self` is checked to be valid and in bounds above. - unsafe { return &*get_offset_len_noubcheck(slice, start, new_len) } + unsafe { return &*get_offset_len_noubcheck(slice, start, new_len) }; } } slice_index_fail(start, end, slice.len()) @@ -712,7 +711,7 @@ unsafe impl const SliceIndex<[T]> for ops::RangeInclusive { start = if exhausted { end } else { start }; if let Some(new_len) = usize::checked_sub(end, start) { // SAFETY: `self` is checked to be valid and in bounds above. - unsafe { return &mut *get_offset_len_mut_noubcheck(slice, start, new_len) } + unsafe { return &mut *get_offset_len_mut_noubcheck(slice, start, new_len) }; } } slice_index_fail(start, end, slice.len()) diff --git a/tests/codegen-llvm/slice-range-indexing.rs b/tests/codegen-llvm/slice-range-indexing.rs index af8ff00bf6f80..e1f044f5ce881 100644 --- a/tests/codegen-llvm/slice-range-indexing.rs +++ b/tests/codegen-llvm/slice-range-indexing.rs @@ -58,9 +58,9 @@ tests!(RangeTo, get_range_to, index_range_to); // CHECK: ret tests!(RangeFrom, get_range_from, index_range_from); -// 3 comparisons required: (range.end != usize::MAX) && (range.end < slice.len()) && (range.start <= range.end + 1) +// 2 comparisons required: (range.end < slice.len()) && (range.start <= range.end + 1) // CHECK-LABEL: @get_range_inclusive -// CHECK-COUNT-3: %{{.+}} = icmp +// CHECK-COUNT-2: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret @@ -71,9 +71,9 @@ tests!(RangeFrom, get_range_from, index_range_from); // CHECK: ret tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); -// 2 comparisons required: (range.end != usize::MAX) && (range.end < slice.len()) +// 1 comparison required: (range.end < slice.len()) // CHECK-LABEL: @get_range_to_inclusive -// CHECK-COUNT-2: %{{.+}} = icmp +// CHECK-COUNT-1: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret From d1eb5a5e0989c5e9b99874990c58df5c6d1d480c Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Fri, 25 Jul 2025 20:14:26 +0100 Subject: [PATCH 3/4] Optimize `std::slice::range` Same reasoning as previous commit. --- library/core/src/slice/index.rs | 57 +++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/library/core/src/slice/index.rs b/library/core/src/slice/index.rs index a07ab0bbb4084..bab016b73d9e0 100644 --- a/library/core/src/slice/index.rs +++ b/library/core/src/slice/index.rs @@ -911,6 +911,7 @@ where ops::Bound::Excluded(&end) if end > len => slice_index_fail(0, end, len), ops::Bound::Excluded(&end) => end, + ops::Bound::Unbounded => len, }; @@ -966,19 +967,29 @@ where { let len = bounds.end; - let start = match range.start_bound() { - ops::Bound::Included(&start) => start, - ops::Bound::Excluded(start) => start.checked_add(1)?, - ops::Bound::Unbounded => 0, - }; - let end = match range.end_bound() { - ops::Bound::Included(end) => end.checked_add(1)?, + ops::Bound::Included(&end) if end >= len => return None, + // Cannot overflow because `end < len` implies `end < usize::MAX`. + ops::Bound::Included(end) => end + 1, + + ops::Bound::Excluded(&end) if end > len => return None, ops::Bound::Excluded(&end) => end, + ops::Bound::Unbounded => len, }; - if start > end || end > len { None } else { Some(ops::Range { start, end }) } + let start = match range.start_bound() { + ops::Bound::Excluded(&start) if start >= end => return None, + // Cannot overflow because `start < end` implies `start < usize::MAX`. + ops::Bound::Excluded(&start) => start + 1, + + ops::Bound::Included(&start) if start > end => return None, + ops::Bound::Included(&start) => start, + + ops::Bound::Unbounded => 0, + }; + + Some(ops::Range { start, end }) } /// Converts a pair of `ops::Bound`s into `ops::Range` without performing any @@ -1007,21 +1018,27 @@ pub(crate) fn into_range( len: usize, (start, end): (ops::Bound, ops::Bound), ) -> Option> { - use ops::Bound; - let start = match start { - Bound::Included(start) => start, - Bound::Excluded(start) => start.checked_add(1)?, - Bound::Unbounded => 0, - }; - let end = match end { - Bound::Included(end) => end.checked_add(1)?, - Bound::Excluded(end) => end, - Bound::Unbounded => len, + ops::Bound::Included(end) if end >= len => return None, + // Cannot overflow because `end < len` implies `end < usize::MAX`. + ops::Bound::Included(end) => end + 1, + + ops::Bound::Excluded(end) if end > len => return None, + ops::Bound::Excluded(end) => end, + + ops::Bound::Unbounded => len, }; - // Don't bother with checking `start < end` and `end <= len` - // since these checks are handled by `Range` impls + let start = match start { + ops::Bound::Excluded(start) if start >= end => return None, + // Cannot overflow because `start < end` implies `start < usize::MAX`. + ops::Bound::Excluded(start) => start + 1, + + ops::Bound::Included(start) if start > end => return None, + ops::Bound::Included(start) => start, + + ops::Bound::Unbounded => 0, + }; Some(start..end) } From ab337480ca180bf418903dd68b9ef14df6d2b1ad Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Fri, 25 Jul 2025 21:57:44 +0100 Subject: [PATCH 4/4] Optimize `SliceIndex` for `RangeInclusive` Same reasoning as previous two commits. --- library/alloctests/tests/str.rs | 4 +- library/core/src/range.rs | 9 ---- library/core/src/str/mod.rs | 2 +- library/core/src/str/traits.rs | 61 +++++++++++++----------- tests/codegen-llvm/str-range-indexing.rs | 8 ++-- 5 files changed, 41 insertions(+), 43 deletions(-) diff --git a/library/alloctests/tests/str.rs b/library/alloctests/tests/str.rs index 906fa2d425e77..d674937eb8524 100644 --- a/library/alloctests/tests/str.rs +++ b/library/alloctests/tests/str.rs @@ -631,13 +631,13 @@ mod slice_index { // note: using 0 specifically ensures that the result of overflowing is 0..0, // so that `get` doesn't simply return None for the wrong reason. bad: data[0..=usize::MAX]; - message: "maximum usize"; + message: "out of bounds"; } in mod rangetoinclusive { data: "hello"; bad: data[..=usize::MAX]; - message: "maximum usize"; + message: "out of bounds"; } } } diff --git a/library/core/src/range.rs b/library/core/src/range.rs index a096a8ceafc87..d1f4a1cf9ecb0 100644 --- a/library/core/src/range.rs +++ b/library/core/src/range.rs @@ -332,15 +332,6 @@ impl RangeInclusive { } } -impl RangeInclusive { - /// Converts to an exclusive `Range` for `SliceIndex` implementations. - /// The caller is responsible for dealing with `last == usize::MAX`. - #[inline] - pub(crate) const fn into_slice_range(self) -> Range { - Range { start: self.start, end: self.last + 1 } - } -} - #[unstable(feature = "new_range_api", issue = "125687")] impl RangeBounds for RangeInclusive { fn start_bound(&self) -> Bound<&T> { diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 82019b9b3afe5..c746698756723 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -87,7 +87,7 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! { let ellipsis = if trunc_len < s.len() { "[...]" } else { "" }; // 1. out of bounds - if begin > s.len() || end > s.len() { + if begin > s.len() || end > s.len() || (end == s.len() && s.is_char_boundary(begin)) { let oob_index = if begin > s.len() { begin } else { end }; panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}"); } diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs index a7cc943994c53..61036fbff4392 100644 --- a/library/core/src/str/traits.rs +++ b/library/core/src/str/traits.rs @@ -76,13 +76,6 @@ where } } -#[inline(never)] -#[cold] -#[track_caller] -const fn str_index_overflow_fail() -> ! { - panic!("attempted to index str up to maximum usize"); -} - /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`. /// /// Returns a slice of the whole string, i.e., returns `&self` or `&mut @@ -640,11 +633,11 @@ unsafe impl const SliceIndex for ops::RangeInclusive { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) } } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) } } #[inline] unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output { @@ -658,17 +651,37 @@ unsafe impl const SliceIndex for ops::RangeInclusive { } #[inline] fn index(self, slice: &str) -> &Self::Output { - if *self.end() == usize::MAX { - str_index_overflow_fail(); + let Self { mut start, mut end, exhausted } = self; + let len = slice.len(); + if end < len { + end = end + 1; + start = if exhausted { end } else { start }; + if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) { + // SAFETY: just checked that `start` and `end` are on a char boundary, + // and we are passing in a safe reference, so the return value will also be one. + // We also checked char boundaries, so this is valid UTF-8. + unsafe { return &*(start..end).get_unchecked(slice) }; + } } - self.into_slice_range().index(slice) + + super::slice_error_fail(slice, start, end); } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - if *self.end() == usize::MAX { - str_index_overflow_fail(); + let Self { mut start, mut end, exhausted } = self; + let len = slice.len(); + if end < len { + end = end + 1; + start = if exhausted { end } else { start }; + if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) { + // SAFETY: just checked that `start` and `end` are on a char boundary, + // and we are passing in a safe reference, so the return value will also be one. + // We also checked char boundaries, so this is valid UTF-8. + unsafe { return &mut *(start..end).get_unchecked_mut(slice) }; + } } - self.into_slice_range().index_mut(slice) + + super::slice_error_fail(slice, start, end); } } @@ -678,35 +691,29 @@ unsafe impl const SliceIndex for range::RangeInclusive { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) } + ops::RangeInclusive::from(self).get(slice) } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) } + ops::RangeInclusive::from(self).get_mut(slice) } #[inline] unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output { // SAFETY: the caller must uphold the safety contract for `get_unchecked`. - unsafe { self.into_slice_range().get_unchecked(slice) } + unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) } } #[inline] unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output { // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`. - unsafe { self.into_slice_range().get_unchecked_mut(slice) } + unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) } } #[inline] fn index(self, slice: &str) -> &Self::Output { - if self.last == usize::MAX { - str_index_overflow_fail(); - } - self.into_slice_range().index(slice) + ops::RangeInclusive::from(self).index(slice) } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - if self.last == usize::MAX { - str_index_overflow_fail(); - } - self.into_slice_range().index_mut(slice) + ops::RangeInclusive::from(self).index_mut(slice) } } diff --git a/tests/codegen-llvm/str-range-indexing.rs b/tests/codegen-llvm/str-range-indexing.rs index fed5b65267d83..4cbcd70980168 100644 --- a/tests/codegen-llvm/str-range-indexing.rs +++ b/tests/codegen-llvm/str-range-indexing.rs @@ -53,23 +53,23 @@ tests!(RangeTo, get_range_to, index_range_to); tests!(RangeFrom, get_range_from, index_range_from); // CHECK-LABEL: @get_range_inclusive -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret // CHECK-LABEL: @index_range_inclusive -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); // CHECK-LABEL: @get_range_to_inclusive -// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-COUNT-3: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret // CHECK-LABEL: @index_range_to_inclusive -// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-COUNT-3: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret tests!(RangeToInclusive, get_range_to_inclusive, index_range_to_inclusive);