|
1 | 1 | //! Utilities for validating string and char literals and turning them into |
2 | 2 | //! values they represent. |
3 | 3 |
|
| 4 | +use std::ffi::CStr; |
4 | 5 | use std::ops::Range; |
5 | 6 | use std::str::Chars; |
6 | 7 |
|
@@ -138,37 +139,94 @@ pub fn unescape_for_errors( |
138 | 139 | /// and produces a sequence of characters or errors, |
139 | 140 | /// which are returned by invoking `callback`. |
140 | 141 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). |
141 | | -pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
142 | | - check_raw_common(src, Mode::RawStr, &mut callback) |
| 142 | +pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
| 143 | + str::check_raw(src, callback); |
143 | 144 | } |
144 | 145 |
|
145 | 146 | /// Takes the contents of a raw byte string literal (without quotes) |
146 | 147 | /// and produces a sequence of bytes or errors, |
147 | 148 | /// which are returned by invoking `callback`. |
148 | 149 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). |
149 | | -pub fn check_raw_byte_str( |
150 | | - src: &str, |
151 | | - mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>), |
152 | | -) { |
153 | | - check_raw_common(src, Mode::RawByteStr, &mut |r, res| { |
154 | | - callback(r, res.map(byte_from_char)) |
155 | | - }) |
| 150 | +pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) { |
| 151 | + <[u8]>::check_raw(src, callback); |
156 | 152 | } |
157 | 153 |
|
158 | 154 | /// Takes the contents of a raw C string literal (without quotes) |
159 | 155 | /// and produces a sequence of characters or errors, |
160 | 156 | /// which are returned by invoking `callback`. |
161 | 157 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). |
162 | | -pub fn check_raw_c_str( |
163 | | - src: &str, |
164 | | - mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>), |
165 | | -) { |
166 | | - check_raw_common(src, Mode::RawCStr, &mut |r, mut result| { |
167 | | - if let Ok('\0') = result { |
168 | | - result = Err(EscapeError::NulInCStr); |
| 158 | +pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
| 159 | + CStr::check_raw(src, callback); |
| 160 | +} |
| 161 | + |
| 162 | +/// trait for checking raw strings |
| 163 | +trait CheckRaw { |
| 164 | + /// Unit type of the implementing string type (`char` for string, `u8` for byte string) |
| 165 | + type RawUnit; |
| 166 | + |
| 167 | + /// Converts chars to the unit type of the literal type |
| 168 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError>; |
| 169 | + |
| 170 | + /// Takes the contents of a raw literal (without quotes) |
| 171 | + /// and produces a sequence of `Result<Self::RawUnit, EscapeError>` |
| 172 | + /// which are returned via `callback`. |
| 173 | + /// |
| 174 | + /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). |
| 175 | + fn check_raw( |
| 176 | + src: &str, |
| 177 | + mut callback: impl FnMut(Range<usize>, Result<Self::RawUnit, EscapeError>), |
| 178 | + ) { |
| 179 | + let mut chars = src.chars(); |
| 180 | + while let Some(c) = chars.next() { |
| 181 | + let start = src.len() - chars.as_str().len() - c.len_utf8(); |
| 182 | + let res = match c { |
| 183 | + '\r' => Err(EscapeError::BareCarriageReturnInRawString), |
| 184 | + _ => Self::char2raw_unit(c), |
| 185 | + }; |
| 186 | + let end = src.len() - chars.as_str().len(); |
| 187 | + callback(start..end, res); |
169 | 188 | } |
170 | | - callback(r, result) |
171 | | - }) |
| 189 | + |
| 190 | + // Unfortunately, it is a bit unclear whether the following equivalent code is slower or faster: bug 141855 |
| 191 | + // src.char_indices().for_each(|(pos, c)| { |
| 192 | + // callback( |
| 193 | + // pos..pos + c.len_utf8(), |
| 194 | + // if c == '\r' { |
| 195 | + // Err(EscapeError::BareCarriageReturnInRawString) |
| 196 | + // } else { |
| 197 | + // Self::char2raw_unit(c) |
| 198 | + // }, |
| 199 | + // ); |
| 200 | + // }); |
| 201 | + } |
| 202 | +} |
| 203 | + |
| 204 | +impl CheckRaw for str { |
| 205 | + type RawUnit = char; |
| 206 | + |
| 207 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 208 | + Ok(c) |
| 209 | + } |
| 210 | +} |
| 211 | + |
| 212 | +impl CheckRaw for [u8] { |
| 213 | + type RawUnit = u8; |
| 214 | + |
| 215 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 216 | + char2byte(c) |
| 217 | + } |
| 218 | +} |
| 219 | + |
| 220 | +impl CheckRaw for CStr { |
| 221 | + type RawUnit = char; |
| 222 | + |
| 223 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 224 | + if c == '\0' { |
| 225 | + Err(EscapeError::NulInCStr) |
| 226 | + } else { |
| 227 | + Ok(c) |
| 228 | + } |
| 229 | + } |
172 | 230 | } |
173 | 231 |
|
174 | 232 | /// Takes the contents of a string literal (without quotes) |
@@ -497,34 +555,18 @@ where |
497 | 555 | *chars = tail.chars(); |
498 | 556 | } |
499 | 557 |
|
500 | | -/// Takes a contents of a string literal (without quotes) and produces a |
501 | | -/// sequence of characters or errors. |
502 | | -/// NOTE: Raw strings do not perform any explicit character escaping, here we |
503 | | -/// only produce errors on bare CR. |
504 | | -fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F) |
505 | | -where |
506 | | - F: FnMut(Range<usize>, Result<char, EscapeError>), |
507 | | -{ |
508 | | - let mut chars = src.chars(); |
509 | | - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop |
510 | | - |
511 | | - // The `start` and `end` computation here matches the one in |
512 | | - // `unescape_non_raw_common` for consistency, even though this function |
513 | | - // doesn't have to worry about skipping any chars. |
514 | | - while let Some(c) = chars.next() { |
515 | | - let start = src.len() - chars.as_str().len() - c.len_utf8(); |
516 | | - let res = match c { |
517 | | - '\r' => Err(EscapeError::BareCarriageReturnInRawString), |
518 | | - _ => ascii_check(c, allow_unicode_chars), |
519 | | - }; |
520 | | - let end = src.len() - chars.as_str().len(); |
521 | | - callback(start..end, res); |
522 | | - } |
523 | | -} |
524 | | - |
525 | 558 | #[inline] |
526 | 559 | fn byte_from_char(c: char) -> u8 { |
527 | 560 | let res = c as u32; |
528 | 561 | debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr"); |
529 | 562 | res as u8 |
530 | 563 | } |
| 564 | + |
| 565 | +fn char2byte(c: char) -> Result<u8, EscapeError> { |
| 566 | + // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte) |
| 567 | + if c.is_ascii() { |
| 568 | + Ok(c as u8) |
| 569 | + } else { |
| 570 | + Err(EscapeError::NonAsciiCharInByte) |
| 571 | + } |
| 572 | +} |
0 commit comments