1- //! Utilities for validating string and char literals and turning them into
2- //! values they represent.
1+ //! Utilities for validating (raw) string, char, and byte literals and
2+ //! turning escape sequences into the values they represent.
33
44use std:: ffi:: CStr ;
55use std:: ops:: Range ;
@@ -8,9 +8,9 @@ use std::str::Chars;
88#[ cfg( test) ]
99mod tests;
1010
11- /// Errors and warnings that can occur during string unescaping. They mostly
12- /// relate to malformed escape sequences, but there are a few that are about
13- /// other problems.
11+ /// Errors and warnings that can occur during string, char, and byte unescaping.
12+ ///
13+ /// Mostly relating to malformed escape sequences, but also a few other problems.
1414#[ derive( Debug , PartialEq , Eq ) ]
1515pub enum EscapeError {
1616 /// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
5858 /// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
5959 NonAsciiCharInByte ,
6060
61- // `\0` in a C string literal.
61+ /// `\0` in a C string literal.
6262 NulInCStr ,
6363
6464 /// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
7979 }
8080}
8181
82+ /// Check a raw string literal for validity
83+ ///
8284/// Takes the contents of a raw string literal (without quotes)
8385/// and produces a sequence of characters or errors,
8486/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
8789 str:: check_raw ( src, callback) ;
8890}
8991
92+ /// Check a raw byte string literal for validity
93+ ///
9094/// Takes the contents of a raw byte string literal (without quotes)
9195/// and produces a sequence of bytes or errors,
9296/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
9599 <[ u8 ] >:: check_raw ( src, callback) ;
96100}
97101
102+ /// Check a raw C string literal for validity
103+ ///
98104/// Takes the contents of a raw C string literal (without quotes)
99105/// and produces a sequence of characters or errors,
100106/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103109 CStr :: check_raw ( src, callback) ;
104110}
105111
106- /// trait for checking raw strings
112+ /// Trait for checking raw string literals for validity
107113trait CheckRaw {
108114 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109115 type RawUnit ;
@@ -149,6 +155,7 @@ impl CheckRaw for [u8] {
149155 }
150156}
151157
158+ /// Turn an ascii char into a byte
152159fn char2byte ( c : char ) -> Result < u8 , EscapeError > {
153160 // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
154161 if c. is_ascii ( ) {
@@ -170,32 +177,42 @@ impl CheckRaw for CStr {
170177 }
171178}
172179
180+ /// Unescape a char literal
181+ ///
173182/// Takes the contents of a char literal (without quotes),
174183/// and returns an unescaped char or an error.
175184pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
176185 str:: unescape_single ( & mut src. chars ( ) )
177186}
178187
188+ /// Unescape a byte literal
189+ ///
179190/// Takes the contents of a byte literal (without quotes),
180191/// and returns an unescaped byte or an error.
181192pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
182193 <[ u8 ] >:: unescape_single ( & mut src. chars ( ) )
183194}
184195
196+ /// Unescape a string literal
197+ ///
185198/// Takes the contents of a string literal (without quotes)
186199/// and produces a sequence of escaped characters or errors,
187200/// which are returned by invoking `callback`.
188201pub fn unescape_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
189202 str:: unescape ( src, callback)
190203}
191204
205+ /// Unescape a byte string literal
206+ ///
192207/// Takes the contents of a byte string literal (without quotes)
193208/// and produces a sequence of escaped bytes or errors,
194209/// which are returned by invoking `callback`.
195210pub fn unescape_byte_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < u8 , EscapeError > ) ) {
196211 <[ u8 ] >:: unescape ( src, callback)
197212}
198213
214+ /// Unescape a C string literal
215+ ///
199216/// Takes the contents of a C string literal (without quotes)
200217/// and produces a sequence of escaped MixedUnits or errors,
201218/// which are returned by invoking `callback`.
@@ -206,6 +223,8 @@ pub fn unescape_c_str(
206223 CStr :: unescape ( src, callback)
207224}
208225
226+ /// Enum representing either a char or a byte
227+ ///
209228/// Used for mixed utf8 string literals, i.e. those that allow both unicode
210229/// chars and high bytes.
211230#[ derive( Copy , Clone , Debug , PartialEq , Eq ) ]
@@ -242,7 +261,7 @@ impl From<u8> for MixedUnit {
242261 }
243262}
244263
245- /// trait for unescaping escape sequences in strings
264+ /// Trait for unescaping escape sequences in strings
246265trait Unescape {
247266 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
248267 type Unit : From < u8 > ;
@@ -295,7 +314,9 @@ trait Unescape {
295314 }
296315 }
297316
298- /// Takes the contents of a raw literal (without quotes)
317+ /// Unescape a string literal
318+ ///
319+ /// Takes the contents of a raw string literal (without quotes)
299320 /// and produces a sequence of `Result<Self::Unit, EscapeError>`
300321 /// which are returned via `callback`.
301322 fn unescape (
@@ -328,7 +349,9 @@ trait Unescape {
328349 }
329350}
330351
331- /// Parse the character of an ASCII escape (except nul) without the leading backslash.
352+ /// Interpret a non-nul ASCII escape
353+ ///
354+ /// Parses the character of an ASCII escape (except nul) without the leading backslash.
332355fn simple_escape ( c : char ) -> Result < u8 , char > {
333356 // Previous character was '\\', unescape what follows.
334357 Ok ( match c {
@@ -342,7 +365,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
342365 } )
343366}
344367
345- /// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
368+ /// Interpret a hexadecimal escape
369+ ///
370+ /// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
346371fn hex_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u8 , EscapeError > {
347372 let hi = chars. next ( ) . ok_or ( EscapeError :: TooShortHexEscape ) ?;
348373 let hi = hi. to_digit ( 16 ) . ok_or ( EscapeError :: InvalidCharInHexEscape ) ?;
@@ -353,6 +378,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
353378 Ok ( ( hi * 16 + lo) as u8 )
354379}
355380
381+ /// Interpret a unicode escape
382+ ///
356383/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
357384/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
358385fn unicode_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u32 , EscapeError > {
@@ -400,6 +427,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
400427 }
401428}
402429
430+ /// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
431+ ///
403432/// Skip ASCII whitespace, except for the formfeed character
404433/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
405434/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -501,7 +530,7 @@ impl Unescape for CStr {
501530 }
502531}
503532
504- /// What kind of literal do we parse.
533+ /// Enum of the different kinds of literal
505534#[ derive( Debug , Clone , Copy , PartialEq ) ]
506535pub enum Mode {
507536 Char ,
@@ -540,10 +569,14 @@ impl Mode {
540569 }
541570}
542571
572+ /// Check a literal only for errors
573+ ///
543574/// Takes the contents of a literal (without quotes)
544- /// and produces a sequence of errors,
575+ /// and produces a sequence of only errors,
545576/// which are returned by invoking `error_callback`.
546- pub fn unescape_for_errors (
577+ ///
578+ /// NB Does not produce any output other than errors
579+ pub fn check_for_errors (
547580 src : & str ,
548581 mode : Mode ,
549582 mut error_callback : impl FnMut ( Range < usize > , EscapeError ) ,
0 commit comments