@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
654654 // Note: r as in r" or r#" is part of a raw string literal,
655655 // b as in b' is part of a byte literal.
656656 // They are not identifiers, and are handled further down.
657- ( 'r' , Some ( '"' ) ) | ( 'r' , Some ( '#' ) ) | ( 'b' , Some ( '\'' ) ) => false ,
657+ ( 'r' , Some ( '"' ) ) | ( 'r' , Some ( '#' ) ) |
658+ ( 'b' , Some ( '"' ) ) | ( 'b' , Some ( '\'' ) ) => false ,
658659 _ => true
659660 } {
660661 let start = self . last_pos ;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
859860 }
860861 'b' => {
861862 self . bump ( ) ;
862- assert ! ( self . curr_is( '\'' ) , "Should have been a token::IDENT" ) ;
863- self . bump ( ) ;
864- let start = self . last_pos ;
865-
866- // the eof will be picked up by the final `'` check below
867- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
868- self . bump ( ) ;
863+ return match self . curr {
864+ Some ( '\'' ) => parse_byte ( self ) ,
865+ Some ( '"' ) => parse_byte_string ( self ) ,
866+ _ => unreachable ! ( ) // Should have been a token::IDENT above.
867+ } ;
869868
870- match c2 {
871- '\\' => {
872- // '\X' for some X must be a character constant:
873- let escaped = self . curr ;
874- let escaped_pos = self . last_pos ;
875- self . bump ( ) ;
876- match escaped {
877- None => { }
878- Some ( e) => {
879- c2 = match e {
880- 'n' => '\n' ,
881- 'r' => '\r' ,
882- 't' => '\t' ,
883- '\\' => '\\' ,
884- '\'' => '\'' ,
885- '"' => '"' ,
886- '0' => '\x00' ,
887- 'x' => self . scan_numeric_escape ( 2 u, '\'' ) ,
888- c2 => {
889- self . err_span_char ( escaped_pos, self . last_pos ,
890- "unknown byte escape" , c2) ;
891- c2
869+ fn parse_byte ( self_ : & mut StringReader ) -> token:: Token {
870+ self_. bump ( ) ;
871+ let start = self_. last_pos ;
872+
873+ // the eof will be picked up by the final `'` check below
874+ let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
875+ self_. bump ( ) ;
876+
877+ match c2 {
878+ '\\' => {
879+ // '\X' for some X must be a character constant:
880+ let escaped = self_. curr ;
881+ let escaped_pos = self_. last_pos ;
882+ self_. bump ( ) ;
883+ match escaped {
884+ None => { }
885+ Some ( e) => {
886+ c2 = match e {
887+ 'n' => '\n' ,
888+ 'r' => '\r' ,
889+ 't' => '\t' ,
890+ '\\' => '\\' ,
891+ '\'' => '\'' ,
892+ '"' => '"' ,
893+ '0' => '\x00' ,
894+ 'x' => self_. scan_numeric_escape ( 2 u, '\'' ) ,
895+ c2 => {
896+ self_. err_span_char (
897+ escaped_pos, self_. last_pos ,
898+ "unknown byte escape" , c2) ;
899+ c2
900+ }
892901 }
893902 }
894903 }
895904 }
905+ '\t' | '\n' | '\r' | '\'' => {
906+ self_. err_span_char ( start, self_. last_pos ,
907+ "byte constant must be escaped" , c2) ;
908+ }
909+ _ => if c2 > '\x7F' {
910+ self_. err_span_char ( start, self_. last_pos ,
911+ "byte constant must be ASCII. \
912+ Use a \\ xHH escape for a non-ASCII byte", c2) ;
913+ }
896914 }
897- '\t' | '\n' | '\r' | '\'' => {
898- self . err_span_char ( start, self . last_pos ,
899- "byte constant must be escaped" , c2) ;
900- }
901- _ if c2 > '\x7F' => {
902- self . err_span_char ( start, self . last_pos ,
903- "byte constant must be ASCII. \
904- Use a \\ xHH escape for a non-ASCII byte", c2) ;
915+ if !self_. curr_is ( '\'' ) {
916+ // Byte offsetting here is okay because the
917+ // character before position `start` are an
918+ // ascii single quote and ascii 'b'.
919+ self_. fatal_span_verbose (
920+ start - BytePos ( 2 ) , self_. last_pos ,
921+ "unterminated byte constant" . to_string ( ) ) ;
905922 }
906- _ => { }
923+ self_. bump ( ) ; // advance curr past token
924+ return token:: LIT_BYTE ( c2 as u8 ) ;
907925 }
908- if !self . curr_is ( '\'' ) {
909- self . fatal_span_verbose (
910- // Byte offsetting here is okay because the
911- // character before position `start` are an
912- // ascii single quote and ascii 'b'.
913- start - BytePos ( 2 ) , self . last_pos ,
914- "unterminated byte constant" . to_string ( ) ) ;
926+
927+ fn parse_byte_string ( self_ : & mut StringReader ) -> token:: Token {
928+ self_. bump ( ) ;
929+ let start = self_. last_pos ;
930+ let mut value = Vec :: new ( ) ;
931+ while !self_. curr_is ( '"' ) {
932+ if self_. is_eof ( ) {
933+ self_. fatal_span ( start, self_. last_pos ,
934+ "unterminated double quote byte string" ) ;
935+ }
936+
937+ let ch = self_. curr . unwrap ( ) ;
938+ self_. bump ( ) ;
939+ match ch {
940+ '\\' => {
941+ if self_. is_eof ( ) {
942+ self_. fatal_span ( start, self_. last_pos ,
943+ "unterminated double quote byte string" ) ;
944+ }
945+
946+ let escaped = self_. curr . unwrap ( ) ;
947+ let escaped_pos = self_. last_pos ;
948+ self_. bump ( ) ;
949+ match escaped {
950+ 'n' => value. push ( '\n' as u8 ) ,
951+ 'r' => value. push ( '\r' as u8 ) ,
952+ 't' => value. push ( '\t' as u8 ) ,
953+ '\\' => value. push ( '\\' as u8 ) ,
954+ '\'' => value. push ( '\'' as u8 ) ,
955+ '"' => value. push ( '"' as u8 ) ,
956+ '\n' => self_. consume_whitespace ( ) ,
957+ '0' => value. push ( 0 ) ,
958+ 'x' => {
959+ value. push ( self_. scan_numeric_escape ( 2 u, '"' ) as u8 ) ;
960+ }
961+ c2 => {
962+ self_. err_span_char ( escaped_pos, self_. last_pos ,
963+ "unknown byte string escape" , c2) ;
964+ }
965+ }
966+ }
967+ _ => {
968+ if ch <= '\x7F' {
969+ value. push ( ch as u8 )
970+ } else {
971+ self_. err_span_char ( self_. last_pos , self_. last_pos ,
972+ "byte string must be ASCII. \
973+ Use a \\ xHH escape for a non-ASCII byte", ch) ;
974+ }
975+ }
976+ }
977+ }
978+ self_. bump ( ) ;
979+ return token:: LIT_BINARY ( Rc :: new ( value) ) ;
915980 }
916- self . bump ( ) ; // advance curr past token
917- return token:: LIT_BYTE ( c2 as u8 ) ;
918981 }
919982 '"' => {
920983 let mut accum_str = String :: new ( ) ;
0 commit comments