@@ -636,6 +636,67 @@ impl<'a> StringReader<'a> {
636636 }
637637 }
638638
639+ /// Scan for a single (possibly escaped) byte or char
640+ /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
641+ /// `start` is the position of `first_source_char`, which is already consumed.
642+ fn scan_char_or_byte ( & mut self , start : BytePos , first_source_char : char ,
643+ ascii_only : bool , delim : char ) -> Option < char > {
644+ match first_source_char {
645+ '\\' => {
646+ // '\X' for some X must be a character constant:
647+ let escaped = self . curr ;
648+ let escaped_pos = self . last_pos ;
649+ self . bump ( ) ;
650+ match escaped {
651+ None => { } , // EOF here is an error that will be checked later.
652+ Some ( e) => {
653+ return Some ( match e {
654+ 'n' => '\n' ,
655+ 'r' => '\r' ,
656+ 't' => '\t' ,
657+ '\\' => '\\' ,
658+ '\'' => '\'' ,
659+ '"' => '"' ,
660+ '0' => '\x00' ,
661+ 'x' => self . scan_numeric_escape ( 2 u, delim) ,
662+ 'u' if !ascii_only => self . scan_numeric_escape ( 4 u, delim) ,
663+ 'U' if !ascii_only => self . scan_numeric_escape ( 8 u, delim) ,
664+ '\n' if delim == '"' => {
665+ self . consume_whitespace ( ) ;
666+ return None
667+ } ,
668+ c => {
669+ let last_pos = self . last_pos ;
670+ self . err_span_char (
671+ escaped_pos, last_pos,
672+ if ascii_only { "unknown byte escape" }
673+ else { "unknown character escape" } ,
674+ c) ;
675+ c
676+ }
677+ } )
678+ }
679+ }
680+ }
681+ '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
682+ let last_pos = self . last_pos ;
683+ self . err_span_char (
684+ start, last_pos,
685+ if ascii_only { "byte constant must be escaped" }
686+ else { "character constant must be escaped" } ,
687+ first_source_char) ;
688+ }
689+ _ => if ascii_only && first_source_char > '\x7F' {
690+ let last_pos = self . last_pos ;
691+ self . err_span_char (
692+ start, last_pos,
693+ "byte constant must be ASCII. \
694+ Use a \\ xHH escape for a non-ASCII byte", first_source_char) ;
695+ }
696+ }
697+ Some ( first_source_char)
698+ }
699+
639700 fn binop ( & mut self , op : token:: BinOp ) -> token:: Token {
640701 self . bump ( ) ;
641702 if self . curr_is ( '=' ) {
@@ -810,43 +871,7 @@ impl<'a> StringReader<'a> {
810871 }
811872
812873 // Otherwise it is a character constant:
813- match c2 {
814- '\\' => {
815- // '\X' for some X must be a character constant:
816- let escaped = self . curr ;
817- let escaped_pos = self . last_pos ;
818- self . bump ( ) ;
819- match escaped {
820- None => { }
821- Some ( e) => {
822- c2 = match e {
823- 'n' => '\n' ,
824- 'r' => '\r' ,
825- 't' => '\t' ,
826- '\\' => '\\' ,
827- '\'' => '\'' ,
828- '"' => '"' ,
829- '0' => '\x00' ,
830- 'x' => self . scan_numeric_escape ( 2 u, '\'' ) ,
831- 'u' => self . scan_numeric_escape ( 4 u, '\'' ) ,
832- 'U' => self . scan_numeric_escape ( 8 u, '\'' ) ,
833- c2 => {
834- let last_bpos = self . last_pos ;
835- self . err_span_char ( escaped_pos, last_bpos,
836- "unknown character escape" , c2) ;
837- c2
838- }
839- }
840- }
841- }
842- }
843- '\t' | '\n' | '\r' | '\'' => {
844- let last_bpos = self . last_pos ;
845- self . err_span_char ( start, last_bpos,
846- "character constant must be escaped" , c2) ;
847- }
848- _ => { }
849- }
874+ c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) . unwrap ( ) ;
850875 if !self . curr_is ( '\'' ) {
851876 let last_bpos = self . last_pos ;
852877 self . fatal_span_verbose (
@@ -876,44 +901,7 @@ impl<'a> StringReader<'a> {
876901 let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
877902 self_. bump ( ) ;
878903
879- match c2 {
880- '\\' => {
881- // '\X' for some X must be a character constant:
882- let escaped = self_. curr ;
883- let escaped_pos = self_. last_pos ;
884- self_. bump ( ) ;
885- match escaped {
886- None => { }
887- Some ( e) => {
888- c2 = match e {
889- 'n' => '\n' ,
890- 'r' => '\r' ,
891- 't' => '\t' ,
892- '\\' => '\\' ,
893- '\'' => '\'' ,
894- '"' => '"' ,
895- '0' => '\x00' ,
896- 'x' => self_. scan_numeric_escape ( 2 u, '\'' ) ,
897- c2 => {
898- self_. err_span_char (
899- escaped_pos, self_. last_pos ,
900- "unknown byte escape" , c2) ;
901- c2
902- }
903- }
904- }
905- }
906- }
907- '\t' | '\n' | '\r' | '\'' => {
908- self_. err_span_char ( start, self_. last_pos ,
909- "byte constant must be escaped" , c2) ;
910- }
911- _ => if c2 > '\x7F' {
912- self_. err_span_char ( start, self_. last_pos ,
913- "byte constant must be ASCII. \
914- Use a \\ xHH escape for a non-ASCII byte", c2) ;
915- }
916- }
904+ c2 = self_. scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
917905 if !self_. curr_is ( '\'' ) {
918906 // Byte offsetting here is okay because the
919907 // character before position `start` are an
@@ -936,46 +924,11 @@ impl<'a> StringReader<'a> {
936924 "unterminated double quote byte string" ) ;
937925 }
938926
927+ let ch_start = self_. last_pos ;
939928 let ch = self_. curr . unwrap ( ) ;
940929 self_. bump ( ) ;
941- match ch {
942- '\\' => {
943- if self_. is_eof ( ) {
944- self_. fatal_span ( start, self_. last_pos ,
945- "unterminated double quote byte string" ) ;
946- }
947-
948- let escaped = self_. curr . unwrap ( ) ;
949- let escaped_pos = self_. last_pos ;
950- self_. bump ( ) ;
951- match escaped {
952- 'n' => value. push ( '\n' as u8 ) ,
953- 'r' => value. push ( '\r' as u8 ) ,
954- 't' => value. push ( '\t' as u8 ) ,
955- '\\' => value. push ( '\\' as u8 ) ,
956- '\'' => value. push ( '\'' as u8 ) ,
957- '"' => value. push ( '"' as u8 ) ,
958- '\n' => self_. consume_whitespace ( ) ,
959- '0' => value. push ( 0 ) ,
960- 'x' => {
961- value. push ( self_. scan_numeric_escape ( 2 u, '"' ) as u8 ) ;
962- }
963- c2 => {
964- self_. err_span_char ( escaped_pos, self_. last_pos ,
965- "unknown byte string escape" , c2) ;
966- }
967- }
968- }
969- _ => {
970- if ch <= '\x7F' {
971- value. push ( ch as u8 )
972- } else {
973- self_. err_span_char ( self_. last_pos , self_. last_pos ,
974- "byte string must be ASCII. \
975- Use a \\ xHH escape for a non-ASCII byte", ch) ;
976- }
977- }
978- }
930+ self_. scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
931+ . map ( |ch| value. push ( ch as u8 ) ) ;
979932 }
980933 self_. bump ( ) ;
981934 return token:: LIT_BINARY ( Rc :: new ( value) ) ;
@@ -1039,46 +992,11 @@ impl<'a> StringReader<'a> {
1039992 self . fatal_span ( start_bpos, last_bpos, "unterminated double quote string" ) ;
1040993 }
1041994
995+ let ch_start = self . last_pos ;
1042996 let ch = self . curr . unwrap ( ) ;
1043997 self . bump ( ) ;
1044- match ch {
1045- '\\' => {
1046- if self . is_eof ( ) {
1047- let last_bpos = self . last_pos ;
1048- self . fatal_span ( start_bpos, last_bpos,
1049- "unterminated double quote string" ) ;
1050- }
1051-
1052- let escaped = self . curr . unwrap ( ) ;
1053- let escaped_pos = self . last_pos ;
1054- self . bump ( ) ;
1055- match escaped {
1056- 'n' => accum_str. push_char ( '\n' ) ,
1057- 'r' => accum_str. push_char ( '\r' ) ,
1058- 't' => accum_str. push_char ( '\t' ) ,
1059- '\\' => accum_str. push_char ( '\\' ) ,
1060- '\'' => accum_str. push_char ( '\'' ) ,
1061- '"' => accum_str. push_char ( '"' ) ,
1062- '\n' => self . consume_whitespace ( ) ,
1063- '0' => accum_str. push_char ( '\x00' ) ,
1064- 'x' => {
1065- accum_str. push_char ( self . scan_numeric_escape ( 2 u, '"' ) ) ;
1066- }
1067- 'u' => {
1068- accum_str. push_char ( self . scan_numeric_escape ( 4 u, '"' ) ) ;
1069- }
1070- 'U' => {
1071- accum_str. push_char ( self . scan_numeric_escape ( 8 u, '"' ) ) ;
1072- }
1073- c2 => {
1074- let last_bpos = self . last_pos ;
1075- self . err_span_char ( escaped_pos, last_bpos,
1076- "unknown string escape" , c2) ;
1077- }
1078- }
1079- }
1080- _ => accum_str. push_char ( ch)
1081- }
998+ self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' )
999+ . map ( |ch| accum_str. push_char ( ch) ) ;
10821000 }
10831001 self . bump ( ) ;
10841002 return token:: LIT_STR ( str_to_ident ( accum_str. as_slice ( ) ) ) ;
0 commit comments