@@ -230,7 +230,7 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser {
230230/// Rather than just accepting/rejecting a given literal, unescapes it as
231231/// well. Can take any slice prefixed by a character escape. Returns the
232232/// character and the number of characters consumed.
233- pub fn char_lit ( lit : & str ) -> ( char , isize ) {
233+ pub fn char_lit ( lit : & str , diag : Option < ( Span , & Handler ) > ) -> ( char , isize ) {
234234 use std:: char;
235235
236236 // Handle non-escaped chars first.
@@ -258,8 +258,19 @@ pub fn char_lit(lit: &str) -> (char, isize) {
258258 'u' => {
259259 assert_eq ! ( lit. as_bytes( ) [ 2 ] , b'{' ) ;
260260 let idx = lit. find ( '}' ) . unwrap ( ) ;
261- let v = u32:: from_str_radix ( & lit[ 3 ..idx] , 16 ) . unwrap ( ) ;
262- let c = char:: from_u32 ( v) . unwrap ( ) ;
261+ let s = & lit[ 3 ..idx] . chars ( ) . filter ( |& c| c != '_' ) . collect :: < String > ( ) ;
262+ let v = u32:: from_str_radix ( & s, 16 ) . unwrap ( ) ;
263+ let c = char:: from_u32 ( v) . unwrap_or_else ( || {
264+ if let Some ( ( span, diag) ) = diag {
265+ let mut diag = diag. struct_span_err ( span, "invalid unicode character escape" ) ;
266+ if v > 0x10FFFF {
267+ diag. help ( "unicode escape must be at most 10FFFF" ) . emit ( ) ;
268+ } else {
269+ diag. help ( "unicode escape must not be a surrogate" ) . emit ( ) ;
270+ }
271+ }
272+ '\u{FFFD}'
273+ } ) ;
263274 ( c, ( idx + 1 ) as isize )
264275 }
265276 _ => panic ! ( "lexer should have rejected a bad character escape {}" , lit)
@@ -272,7 +283,7 @@ pub fn escape_default(s: &str) -> String {
272283
273284/// Parse a string representing a string literal into its final form. Does
274285/// unescaping.
275- pub fn str_lit ( lit : & str ) -> String {
286+ pub fn str_lit ( lit : & str , diag : Option < ( Span , & Handler ) > ) -> String {
276287 debug ! ( "parse_str_lit: given {}" , escape_default( lit) ) ;
277288 let mut res = String :: with_capacity ( lit. len ( ) ) ;
278289
@@ -313,7 +324,7 @@ pub fn str_lit(lit: &str) -> String {
313324 eat ( & mut chars) ;
314325 } else {
315326 // otherwise, a normal escape
316- let ( c, n) = char_lit ( & lit[ i..] ) ;
327+ let ( c, n) = char_lit ( & lit[ i..] , diag ) ;
317328 for _ in 0 ..n - 1 { // we don't need to move past the first \
318329 chars. next ( ) ;
319330 }
@@ -385,15 +396,15 @@ pub fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Hand
385396
386397 match lit {
387398 token:: Byte ( i) => ( true , Some ( LitKind :: Byte ( byte_lit ( & i. as_str ( ) ) . 0 ) ) ) ,
388- token:: Char ( i) => ( true , Some ( LitKind :: Char ( char_lit ( & i. as_str ( ) ) . 0 ) ) ) ,
399+ token:: Char ( i) => ( true , Some ( LitKind :: Char ( char_lit ( & i. as_str ( ) , diag ) . 0 ) ) ) ,
389400
390401 // There are some valid suffixes for integer and float literals,
391402 // so all the handling is done internally.
392403 token:: Integer ( s) => ( false , integer_lit ( & s. as_str ( ) , suf, diag) ) ,
393404 token:: Float ( s) => ( false , float_lit ( & s. as_str ( ) , suf, diag) ) ,
394405
395406 token:: Str_ ( s) => {
396- let s = Symbol :: intern ( & str_lit ( & s. as_str ( ) ) ) ;
407+ let s = Symbol :: intern ( & str_lit ( & s. as_str ( ) , diag ) ) ;
397408 ( true , Some ( LitKind :: Str ( s, ast:: StrStyle :: Cooked ) ) )
398409 }
399410 token:: StrRaw ( s, n) => {
0 commit comments