@@ -6,6 +6,46 @@ use crate::iter::FusedIterator;
66use super :: from_utf8_unchecked;
77use super :: validations:: utf8_char_width;
88
9+ impl [ u8 ] {
10+ /// Creates an iterator over the contiguous valid UTF-8 ranges of this
11+ /// slice, and the non-UTF-8 fragments in between.
12+ ///
13+ /// # Examples
14+ ///
15+ /// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
16+ /// code in the form of a C-string literal (`c"..."`).
17+ ///
18+ /// ```
19+ /// use std::fmt::Write as _;
20+ ///
21+ /// pub fn cstr_literal(bytes: &[u8]) -> String {
22+ /// let mut repr = String::new();
23+ /// repr.push_str("c\"");
24+ /// for chunk in bytes.utf8_chunks() {
25+ /// for ch in chunk.valid().chars() {
26+ /// // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
27+ /// write!(repr, "{}", ch.escape_debug()).unwrap();
28+ /// }
29+ /// for byte in chunk.invalid() {
30+ /// write!(repr, "\\x{:02X}", byte).unwrap();
31+ /// }
32+ /// }
33+ /// repr.push('"');
34+ /// repr
35+ /// }
36+ ///
37+ /// fn main() {
38+ /// let lit = cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07");
39+ /// let expected = stringify!(c"\xFErris the 🦀\u{7}");
40+ /// assert_eq!(lit, expected);
41+ /// }
42+ /// ```
43+ #[ stable( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION" ) ]
44+ pub fn utf8_chunks ( & self ) -> Utf8Chunks < ' _ > {
45+ Utf8Chunks { source : self }
46+ }
47+ }
48+
949/// An item returned by the [`Utf8Chunks`] iterator.
1050///
1151/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,23 +54,19 @@ use super::validations::utf8_char_width;
1454/// # Examples
1555///
1656/// ```
17- /// #![feature(utf8_chunks)]
18- ///
19- /// use std::str::Utf8Chunks;
20- ///
2157/// // An invalid UTF-8 string
2258/// let bytes = b"foo\xF1\x80bar";
2359///
2460/// // Decode the first `Utf8Chunk`
25- /// let chunk = Utf8Chunks::new( bytes).next().unwrap();
61+ /// let chunk = bytes.utf8_chunks( ).next().unwrap();
2662///
2763/// // The first three characters are valid UTF-8
2864/// assert_eq!("foo", chunk.valid());
2965///
3066/// // The fourth character is broken
3167/// assert_eq!(b"\xF1\x80", chunk.invalid());
3268/// ```
33- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
69+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
3470#[ derive( Clone , Debug , PartialEq , Eq ) ]
3571pub struct Utf8Chunk < ' a > {
3672 valid : & ' a str ,
@@ -43,7 +79,7 @@ impl<'a> Utf8Chunk<'a> {
4379 /// This substring can be empty at the start of the string or between
4480 /// broken UTF-8 characters.
4581 #[ must_use]
46- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
82+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
4783 pub fn valid ( & self ) -> & ' a str {
4884 self . valid
4985 }
@@ -63,7 +99,7 @@ impl<'a> Utf8Chunk<'a> {
6399 /// [`valid`]: Self::valid
64100 /// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
65101 #[ must_use]
66- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
102+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
67103 pub fn invalid ( & self ) -> & ' a [ u8 ] {
68104 self . invalid
69105 }
@@ -78,7 +114,7 @@ impl fmt::Debug for Debug<'_> {
78114 fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
79115 f. write_char ( '"' ) ?;
80116
81- for chunk in Utf8Chunks :: new ( self . 0 ) {
117+ for chunk in self . 0 . utf8_chunks ( ) {
82118 // Valid part.
83119 // Here we partially parse UTF-8 again which is suboptimal.
84120 {
@@ -123,12 +159,8 @@ impl fmt::Debug for Debug<'_> {
123159/// [`String::from_utf8_lossy`] without allocating heap memory:
124160///
125161/// ```
126- /// #![feature(utf8_chunks)]
127- ///
128- /// use std::str::Utf8Chunks;
129- ///
130162/// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
131- /// for chunk in Utf8Chunks::new( input) {
163+ /// for chunk in input.utf8_chunks( ) {
132164/// push(chunk.valid());
133165///
134166/// if !chunk.invalid().is_empty() {
@@ -140,27 +172,21 @@ impl fmt::Debug for Debug<'_> {
140172///
141173/// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
142174#[ must_use = "iterators are lazy and do nothing unless consumed" ]
143- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
175+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
144176#[ derive( Clone ) ]
145177pub struct Utf8Chunks < ' a > {
146178 source : & ' a [ u8 ] ,
147179}
148180
149181impl < ' a > Utf8Chunks < ' a > {
150- /// Creates a new iterator to decode the bytes.
151- #[ unstable( feature = "utf8_chunks" , issue = "99543" ) ]
152- pub fn new ( bytes : & ' a [ u8 ] ) -> Self {
153- Self { source : bytes }
154- }
155-
156182 #[ doc( hidden) ]
157183 #[ unstable( feature = "str_internals" , issue = "none" ) ]
158184 pub fn debug ( & self ) -> Debug < ' _ > {
159185 Debug ( self . source )
160186 }
161187}
162188
163- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
189+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
164190impl < ' a > Iterator for Utf8Chunks < ' a > {
165191 type Item = Utf8Chunk < ' a > ;
166192
@@ -259,10 +285,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
259285 }
260286}
261287
262- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
288+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
263289impl FusedIterator for Utf8Chunks < ' _ > { }
264290
265- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
291+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
266292impl fmt:: Debug for Utf8Chunks < ' _ > {
267293 fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
268294 f. debug_struct ( "Utf8Chunks" ) . field ( "source" , & self . debug ( ) ) . finish ( )
0 commit comments