1- //! Utilities for validating string and char literals and turning them into
2- //! values they represent.
1+ //! Utilities for validating (raw) string, char, and byte literals and
2+ //! turning escape sequences into the values they represent.
33
44use std:: ffi:: CStr ;
55use std:: ops:: Range ;
@@ -8,9 +8,9 @@ use std::str::Chars;
88#[ cfg( test) ]
99mod tests;
1010
11- /// Errors and warnings that can occur during string unescaping. They mostly
12- /// relate to malformed escape sequences, but there are a few that are about
13- /// other problems.
11+ /// Errors and warnings that can occur during string, char, and byte unescaping.
12+ ///
13+ /// Mostly relating to malformed escape sequences, but also a few other problems.
1414#[ derive( Debug , PartialEq , Eq ) ]
1515pub enum EscapeError {
1616 /// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
5858 /// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
5959 NonAsciiCharInByte ,
6060
61- // `\0` in a C string literal.
61+ /// `\0` in a C string literal.
6262 NulInCStr ,
6363
6464 /// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
7979 }
8080}
8181
82+ /// Check a raw string literal for validity
83+ ///
8284/// Takes the contents of a raw string literal (without quotes)
8385/// and produces a sequence of characters or errors,
8486/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
8789 str:: check_raw ( src, callback) ;
8890}
8991
92+ /// Check a raw byte string literal for validity
93+ ///
9094/// Takes the contents of a raw byte string literal (without quotes)
9195/// and produces a sequence of bytes or errors,
9296/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
9599 <[ u8 ] >:: check_raw ( src, callback) ;
96100}
97101
102+ /// Check a raw C string literal for validity
103+ ///
98104/// Takes the contents of a raw C string literal (without quotes)
99105/// and produces a sequence of characters or errors,
100106/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103109 CStr :: check_raw ( src, callback) ;
104110}
105111
106- /// trait for checking raw strings
112+ /// Trait for checking raw string literals for validity
107113trait CheckRaw {
108114 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109115 type RawUnit ;
@@ -161,6 +167,7 @@ impl CheckRaw for [u8] {
161167 }
162168}
163169
170+ /// Turn an ascii char into a byte
164171fn char2byte ( c : char ) -> Result < u8 , EscapeError > {
165172 // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
166173 if c. is_ascii ( ) {
@@ -182,32 +189,42 @@ impl CheckRaw for CStr {
182189 }
183190}
184191
192+ /// Unescape a char literal
193+ ///
185194/// Takes the contents of a char literal (without quotes),
186195/// and returns an unescaped char or an error.
187196pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
188197 str:: unescape_single ( & mut src. chars ( ) )
189198}
190199
200+ /// Unescape a byte literal
201+ ///
191202/// Takes the contents of a byte literal (without quotes),
192203/// and returns an unescaped byte or an error.
193204pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
194205 <[ u8 ] >:: unescape_single ( & mut src. chars ( ) )
195206}
196207
208+ /// Unescape a string literal
209+ ///
197210/// Takes the contents of a string literal (without quotes)
198211/// and produces a sequence of escaped characters or errors,
199212/// which are returned by invoking `callback`.
200213pub fn unescape_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
201214 str:: unescape ( src, callback)
202215}
203216
217+ /// Unescape a byte string literal
218+ ///
204219/// Takes the contents of a byte string literal (without quotes)
205220/// and produces a sequence of escaped bytes or errors,
206221/// which are returned by invoking `callback`.
207222pub fn unescape_byte_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < u8 , EscapeError > ) ) {
208223 <[ u8 ] >:: unescape ( src, callback)
209224}
210225
226+ /// Unescape a C string literal
227+ ///
211228/// Takes the contents of a C string literal (without quotes)
212229/// and produces a sequence of escaped MixedUnits or errors,
213230/// which are returned by invoking `callback`.
@@ -218,6 +235,8 @@ pub fn unescape_c_str(
218235 CStr :: unescape ( src, callback)
219236}
220237
238+ /// Enum representing either a char or a byte
239+ ///
221240/// Used for mixed utf8 string literals, i.e. those that allow both unicode
222241/// chars and high bytes.
223242#[ derive( Copy , Clone , Debug , PartialEq , Eq ) ]
@@ -254,7 +273,7 @@ impl From<u8> for MixedUnit {
254273 }
255274}
256275
257- /// trait for unescaping escape sequences in strings
276+ /// Trait for unescaping escape sequences in strings
258277trait Unescape {
259278 /// Unit type of the implementing string type (`char` for string, `u8` for byte string)
260279 type Unit : From < u8 > ;
@@ -307,7 +326,9 @@ trait Unescape {
307326 }
308327 }
309328
310- /// Takes the contents of a raw literal (without quotes)
329+ /// Unescape a string literal
330+ ///
331+ /// Takes the contents of a raw string literal (without quotes)
311332 /// and produces a sequence of `Result<Self::Unit, EscapeError>`
312333 /// which are returned via `callback`.
313334 fn unescape (
@@ -340,7 +361,9 @@ trait Unescape {
340361 }
341362}
342363
343- /// Parse the character of an ASCII escape (except nul) without the leading backslash.
364+ /// Interpret a non-nul ASCII escape
365+ ///
366+ /// Parses the character of an ASCII escape (except nul) without the leading backslash.
344367fn simple_escape ( c : char ) -> Result < u8 , char > {
345368 // Previous character was '\\', unescape what follows.
346369 Ok ( match c {
@@ -354,7 +377,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
354377 } )
355378}
356379
357- /// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
380+ /// Interpret a hexadecimal escape
381+ ///
382+ /// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
358383fn hex_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u8 , EscapeError > {
359384 let hi = chars. next ( ) . ok_or ( EscapeError :: TooShortHexEscape ) ?;
360385 let hi = hi. to_digit ( 16 ) . ok_or ( EscapeError :: InvalidCharInHexEscape ) ?;
@@ -365,6 +390,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
365390 Ok ( ( hi * 16 + lo) as u8 )
366391}
367392
393+ /// Interpret a unicode escape
394+ ///
368395/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
369396/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
370397fn unicode_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u32 , EscapeError > {
@@ -412,6 +439,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
412439 }
413440}
414441
442+ /// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
443+ ///
415444/// Skip ASCII whitespace, except for the formfeed character
416445/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
417446/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -513,7 +542,7 @@ impl Unescape for CStr {
513542 }
514543}
515544
516- /// What kind of literal do we parse.
545+ /// Enum of the different kinds of literal
517546#[ derive( Debug , Clone , Copy , PartialEq ) ]
518547pub enum Mode {
519548 Char ,
@@ -552,10 +581,14 @@ impl Mode {
552581 }
553582}
554583
584+ /// Check a literal only for errors
585+ ///
555586/// Takes the contents of a literal (without quotes)
556- /// and produces a sequence of errors,
587+ /// and produces a sequence of only errors,
557588/// which are returned by invoking `error_callback`.
558- pub fn unescape_for_errors (
589+ ///
590+ /// NB Does not produce any output other than errors
591+ pub fn check_for_errors (
559592 src : & str ,
560593 mode : Mode ,
561594 mut error_callback : impl FnMut ( Range < usize > , EscapeError ) ,
0 commit comments