@@ -44,6 +44,12 @@ pub struct UnmatchedBrace {
4444 pub candidate_span : Option < Span > ,
4545}
4646
47+ #[ derive( Clone , Copy , Debug ) ]
48+ enum RawStringType {
49+ Unicode ,
50+ Byte ,
51+ }
52+
4753pub struct StringReader < ' a > {
4854 crate sess : & ' a ParseSess ,
4955 /// The absolute offset within the source_map of the next character to read
@@ -1122,11 +1128,10 @@ impl<'a> StringReader<'a> {
11221128 self . validate_byte_str_escape ( start_with_quote) ;
11231129 ( token:: ByteStr , symbol)
11241130 } ,
1125- Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
1131+ Some ( 'r' ) => self . scan_raw_string ( RawStringType :: Byte ) ,
11261132 _ => unreachable ! ( ) , // Should have been a token::Ident above.
11271133 } ;
11281134 let suffix = self . scan_optional_raw_name ( ) ;
1129-
11301135 Ok ( Token :: lit ( kind, symbol, suffix) )
11311136 }
11321137 '"' => {
@@ -1138,100 +1143,9 @@ impl<'a> StringReader<'a> {
11381143 Ok ( Token :: lit ( token:: Str , symbol, suffix) )
11391144 }
11401145 'r' => {
1141- let start_bpos = self . pos ;
1142- self . bump ( ) ;
1143- let mut hash_count: u16 = 0 ;
1144- while self . ch_is ( '#' ) {
1145- if hash_count == 65535 {
1146- let bpos = self . next_pos ;
1147- self . fatal_span_ ( start_bpos,
1148- bpos,
1149- "too many `#` symbols: raw strings may be \
1150- delimited by up to 65535 `#` symbols") . raise ( ) ;
1151- }
1152- self . bump ( ) ;
1153- hash_count += 1 ;
1154- }
1155-
1156- if self . is_eof ( ) {
1157- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1158- } else if !self . ch_is ( '"' ) {
1159- let last_bpos = self . pos ;
1160- let curr_char = self . ch . unwrap ( ) ;
1161- self . fatal_span_char ( start_bpos,
1162- last_bpos,
1163- "found invalid character; only `#` is allowed \
1164- in raw string delimitation",
1165- curr_char) . raise ( ) ;
1166- }
1167- self . bump ( ) ;
1168- let content_start_bpos = self . pos ;
1169- let mut content_end_bpos;
1170- let mut valid = true ;
1171- ' outer: loop {
1172- if self . is_eof ( ) {
1173- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1174- }
1175- let c = self . ch . unwrap ( ) ;
1176- match c {
1177- '"' => {
1178- content_end_bpos = self . pos ;
1179- for _ in 0 ..hash_count {
1180- self . bump ( ) ;
1181- if !self . ch_is ( '#' ) {
1182- continue ' outer;
1183- }
1184- }
1185- break ;
1186- }
1187- '\r' => {
1188- if !self . nextch_is ( '\n' ) {
1189- let last_bpos = self . pos ;
1190- self . err_span_ ( start_bpos,
1191- last_bpos,
1192- "bare CR not allowed in raw string, use \\ r \
1193- instead") ;
1194- valid = false ;
1195- }
1196- }
1197- _ => ( ) ,
1198- }
1199- self . bump ( ) ;
1200- }
1201-
1202- self . bump ( ) ;
1203- if self . ch_is ( '#' ) {
1204- let lo = self . pos ;
1205- while self . ch_is ( '#' ) {
1206- self . bump ( ) ;
1207- }
1208-
1209- let sp = self . mk_sp ( start_bpos, self . pos ) ;
1210- let sp_beg = self . mk_sp ( BytePos ( start_bpos. 0 + 1 ) , BytePos ( start_bpos. 0 + 1 + hash_count as u32 ) ) ;
1211- let sp_end = self . mk_sp ( BytePos ( lo. 0 - hash_count as u32 ) , self . pos ) ;
1212-
1213- let mut err = self . sess . span_diagnostic . struct_span_err ( sp, "too many `#` when terminating raw string" ) ;
1214- err. span_label ( sp_beg, format ! ( "The raw string has {} leading `#`..." , hash_count) ) ;
1215- err. span_label ( sp_end, format ! ( "...but is closed with {}." , self . pos. 0 - lo. 0 + hash_count as u32 ) ) ;
1216- err. span_suggestion_hidden (
1217- self . mk_sp ( lo, self . pos ) ,
1218- "remove the unneeded `#`" ,
1219- String :: new ( ) ,
1220- Applicability :: MachineApplicable ,
1221- ) ;
1222-
1223- err. emit ( ) ;
1224- valid = false ;
1225- }
1226-
1227- let symbol = if valid {
1228- self . name_from_to ( content_start_bpos, content_end_bpos)
1229- } else {
1230- Symbol :: intern ( "??" )
1231- } ;
1146+ let ( lit, symbol) = self . scan_raw_string ( RawStringType :: Unicode ) ;
12321147 let suffix = self . scan_optional_raw_name ( ) ;
1233-
1234- Ok ( Token :: lit ( token:: StrRaw ( hash_count) , symbol, suffix) )
1148+ Ok ( Token :: lit ( lit, symbol, suffix) )
12351149 }
12361150 '-' => {
12371151 if self . nextch_is ( '>' ) {
@@ -1385,42 +1299,44 @@ impl<'a> StringReader<'a> {
13851299 id
13861300 }
13871301
1388- fn scan_raw_byte_string ( & mut self ) -> ( token:: LitKind , Symbol ) {
1302+ fn scan_raw_string ( & mut self , raw_type : RawStringType ) -> ( token:: LitKind , Symbol ) {
13891303 let start_bpos = self . pos ;
13901304 self . bump ( ) ;
1391- let mut hash_count = 0 ;
1305+ let mut hash_count: u16 = 0 ;
13921306 while self . ch_is ( '#' ) {
13931307 if hash_count == 65535 {
13941308 let bpos = self . next_pos ;
13951309 self . fatal_span_ ( start_bpos,
13961310 bpos,
1397- "too many `#` symbols: raw byte strings may be \
1311+ "too many `#` symbols: raw strings may be \
13981312 delimited by up to 65535 `#` symbols") . raise ( ) ;
13991313 }
14001314 self . bump ( ) ;
14011315 hash_count += 1 ;
14021316 }
14031317
1404- if self . is_eof ( ) {
1405- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1406- } else if !self . ch_is ( '"' ) {
1407- let pos = self . pos ;
1408- let ch = self . ch . unwrap ( ) ;
1409- self . fatal_span_char ( start_bpos,
1410- pos,
1411- "found invalid character; only `#` is allowed in raw \
1412- string delimitation",
1413- ch) . raise ( ) ;
1318+ match self . ch {
1319+ None => self . fail_unterminated_raw_string ( start_bpos, hash_count, vec ! [ ] ) ,
1320+ Some ( '"' ) => { } ,
1321+ Some ( c) => {
1322+ let last_bpos = self . pos ;
1323+ self . fatal_span_char ( start_bpos,
1324+ last_bpos,
1325+ "found invalid character; only `#` is allowed \
1326+ in raw string delimitation",
1327+ c) . raise ( ) ;
1328+ }
14141329 }
1330+
14151331 self . bump ( ) ;
14161332 let content_start_bpos = self . pos ;
14171333 let mut content_end_bpos;
1334+ let mut valid = true ;
1335+
14181336 ' outer: loop {
1419- match self . ch {
1420- None => {
1421- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1422- }
1423- Some ( '"' ) => {
1337+ match ( self . ch , raw_type) {
1338+ ( None , _) => self . fail_unterminated_raw_string ( start_bpos, hash_count) ,
1339+ ( Some ( '"' ) , _) => {
14241340 content_end_bpos = self . pos ;
14251341 for _ in 0 ..hash_count {
14261342 self . bump ( ) ;
@@ -1430,19 +1346,66 @@ impl<'a> StringReader<'a> {
14301346 }
14311347 break ;
14321348 }
1433- Some ( c) => {
1349+ ( Some ( '\r' ) , RawStringType :: Unicode ) => {
1350+ if !self . nextch_is ( '\n' ) {
1351+ let last_bpos = self . pos ;
1352+ self . err_span_ ( start_bpos,
1353+ last_bpos,
1354+ "bare CR not allowed in raw string, use \\ r \
1355+ instead") ;
1356+ valid = false ;
1357+ }
1358+ }
1359+ ( Some ( c) , RawStringType :: Byte ) => {
14341360 if c > '\x7F' {
14351361 let pos = self . pos ;
14361362 self . err_span_char ( pos, pos, "raw byte string must be ASCII" , c) ;
14371363 }
14381364 }
1365+ _ => ( ) ,
14391366 }
14401367 self . bump ( ) ;
14411368 }
14421369
14431370 self . bump ( ) ;
1371+ if self . ch_is ( '#' ) {
1372+ let lo = self . pos ;
1373+ while self . ch_is ( '#' ) {
1374+ self . bump ( ) ;
1375+ }
1376+
1377+ let sp = self . mk_sp ( start_bpos, self . pos ) ;
1378+ let sp_beg = self . mk_sp ( BytePos ( start_bpos. 0 + 1 ) ,
1379+ BytePos ( start_bpos. 0 + 1 + hash_count as u32 ) ) ;
1380+ let sp_end = self . mk_sp ( BytePos ( lo. 0 - hash_count as u32 ) , self . pos ) ;
1381+
1382+ let mut err = self . sess
1383+ . span_diagnostic . struct_span_err ( sp, "too many `#` when terminating raw string" ) ;
1384+ err. span_label ( sp_beg, format ! ( "The raw string has {} leading `#`..." , hash_count) ) ;
1385+ err. span_label ( sp_end,
1386+ format ! ( "...but is closed with {}." ,
1387+ self . pos. 0 - lo. 0 + hash_count as u32 ) ) ;
1388+ err. span_suggestion_hidden (
1389+ self . mk_sp ( lo, self . pos ) ,
1390+ "remove the unneeded `#`" ,
1391+ String :: new ( ) ,
1392+ Applicability :: MachineApplicable ,
1393+ ) ;
14441394
1445- ( token:: ByteStrRaw ( hash_count) , self . name_from_to ( content_start_bpos, content_end_bpos) )
1395+ err. emit ( ) ;
1396+ valid = false ;
1397+ }
1398+
1399+ let symbol = if valid {
1400+ self . name_from_to ( content_start_bpos, content_end_bpos)
1401+ } else {
1402+ Symbol :: intern ( "??" )
1403+ } ;
1404+
1405+ match raw_type {
1406+ RawStringType :: Unicode => ( token:: StrRaw ( hash_count) , symbol) ,
1407+ RawStringType :: Byte => ( token:: ByteStrRaw ( hash_count) , symbol) ,
1408+ }
14461409 }
14471410
14481411 fn validate_char_escape ( & self , start_with_quote : BytePos ) {
0 commit comments