@@ -76,15 +76,14 @@ use iter::MatchIndicesInternal;
7676use iter:: SplitInternal ;
7777use iter:: { MatchesInternal , SplitNInternal } ;
7878
79- use validations:: truncate_to_char_boundary;
80-
8179#[ inline( never) ]
8280#[ cold]
8381#[ track_caller]
8482fn slice_error_fail ( s : & str , begin : usize , end : usize ) -> ! {
8583 const MAX_DISPLAY_LENGTH : usize = 256 ;
86- let ( truncated, s_trunc) = truncate_to_char_boundary ( s, MAX_DISPLAY_LENGTH ) ;
87- let ellipsis = if truncated { "[...]" } else { "" } ;
84+ let trunc_len = s. floor_char_boundary ( MAX_DISPLAY_LENGTH ) ;
85+ let s_trunc = & s[ ..trunc_len] ;
86+ let ellipsis = if trunc_len < s. len ( ) { "[...]" } else { "" } ;
8887
8988 // 1. out of bounds
9089 if begin > s. len ( ) || end > s. len ( ) {
@@ -105,10 +104,7 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
105104 // 3. character boundary
106105 let index = if !s. is_char_boundary ( begin) { begin } else { end } ;
107106 // find the character
108- let mut char_start = index;
109- while !s. is_char_boundary ( char_start) {
110- char_start -= 1 ;
111- }
107+ let char_start = s. floor_char_boundary ( index) ;
112108 // `char_start` must be less than len and a char boundary
113109 let ch = s[ char_start..] . chars ( ) . next ( ) . unwrap ( ) ;
114110 let char_range = char_start..char_start + ch. len_utf8 ( ) ;
@@ -215,8 +211,80 @@ impl str {
215211 // code on higher opt-levels. See PR #84751 for more details.
216212 None => index == self . len ( ) ,
217213
218- // This is bit magic equivalent to: b < 128 || b >= 192
219- Some ( & b) => ( b as i8 ) >= -0x40 ,
214+ Some ( & b) => b. is_utf8_char_boundary ( ) ,
215+ }
216+ }
217+
218+ /// Finds the closest `x` not exceeding `index` where `is_char_boundary(x)` is `true`.
219+ ///
220+ /// This method can help you truncate a string so that it's still valid UTF-8, but doesn't
221+ /// exceed a given number of bytes. Note that this is done purely at the character level
222+ /// and can still visually split graphemes, even though the underlying characters aren't
223+ /// split. For example, the emoji 🧑🔬 (scientist) could be split so that the string only
224+ /// includes 🧑 (person) instead.
225+ ///
226+ /// # Examples
227+ ///
228+ /// ```
229+ /// #![feature(round_char_boundary)]
230+ /// let s = "❤️🧡💛💚💙💜";
231+ /// assert_eq!(s.len(), 26);
232+ /// assert!(!s.is_char_boundary(13));
233+ ///
234+ /// let closest = s.floor_char_boundary(13);
235+ /// assert_eq!(closest, 10);
236+ /// assert_eq!(&s[..closest], "❤️🧡");
237+ /// ```
238+ #[ unstable( feature = "round_char_boundary" , issue = "93743" ) ]
239+ #[ inline]
240+ pub fn floor_char_boundary ( & self , index : usize ) -> usize {
241+ if index >= self . len ( ) {
242+ self . len ( )
243+ } else {
244+ let lower_bound = index. saturating_sub ( 3 ) ;
245+ let new_index = self . as_bytes ( ) [ lower_bound..=index]
246+ . iter ( )
247+ . rposition ( |b| b. is_utf8_char_boundary ( ) ) ;
248+
249+ // SAFETY: we know that the character boundary will be within four bytes
250+ unsafe { lower_bound + new_index. unwrap_unchecked ( ) }
251+ }
252+ }
253+
254+ /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
255+ ///
256+ /// This method is the natural complement to [`floor_char_boundary`]. See that method
257+ /// for more details.
258+ ///
259+ /// [`floor_char_boundary`]: str::floor_char_boundary
260+ ///
261+ /// # Panics
262+ ///
263+ /// Panics if `index > self.len()`.
264+ ///
265+ /// # Examples
266+ ///
267+ /// ```
268+ /// #![feature(round_char_boundary)]
269+ /// let s = "❤️🧡💛💚💙💜";
270+ /// assert_eq!(s.len(), 26);
271+ /// assert!(!s.is_char_boundary(13));
272+ ///
273+ /// let closest = s.ceil_char_boundary(13);
274+ /// assert_eq!(closest, 14);
275+ /// assert_eq!(&s[..closest], "❤️🧡💛");
276+ /// ```
277+ #[ unstable( feature = "round_char_boundary" , issue = "93743" ) ]
278+ #[ inline]
279+ pub fn ceil_char_boundary ( & self , index : usize ) -> usize {
280+ if index > self . len ( ) {
281+ slice_error_fail ( self , index, index)
282+ } else {
283+ let upper_bound = Ord :: min ( index + 4 , self . len ( ) ) ;
284+ self . as_bytes ( ) [ index..upper_bound]
285+ . iter ( )
286+ . position ( |b| b. is_utf8_char_boundary ( ) )
287+ . map_or ( upper_bound, |pos| pos + index)
220288 }
221289 }
222290
0 commit comments