@@ -64,6 +64,8 @@ pub enum TokenKind {
6464 /// "ident" or "continue"
6565 /// At this step keywords are also considered identifiers.
6666 Ident ,
67+ /// Like the above, but containing invalid unicode codepoints.
68+ InvalidIdent ,
6769 /// "r#ident"
6870 RawIdent ,
6971 /// An unknown prefix like `foo#`, `foo'`, `foo"`. Note that only the
@@ -411,6 +413,11 @@ impl Cursor<'_> {
411413 let kind = Str { terminated } ;
412414 Literal { kind, suffix_start }
413415 }
416+ // Identifier (this should be checked after other variant that can
417+ // start as identifier).
418+ c if !c. is_ascii ( ) && unic_emoji_char:: is_emoji ( c) => {
419+ self . fake_ident_or_unknown_prefix ( )
420+ }
414421 _ => Unknown ,
415422 } ;
416423 Token :: new ( token_kind, self . len_consumed ( ) )
@@ -492,10 +499,28 @@ impl Cursor<'_> {
492499 // we see a prefix here, it is definitely an unknown prefix.
493500 match self . first ( ) {
494501 '#' | '"' | '\'' => UnknownPrefix ,
502+ c if !c. is_ascii ( ) && unic_emoji_char:: is_emoji ( c) => {
503+ self . fake_ident_or_unknown_prefix ( )
504+ }
495505 _ => Ident ,
496506 }
497507 }
498508
509+ fn fake_ident_or_unknown_prefix ( & mut self ) -> TokenKind {
510+ // Start is already eaten, eat the rest of identifier.
511+ self . eat_while ( |c| {
512+ unicode_xid:: UnicodeXID :: is_xid_continue ( c)
513+ || ( !c. is_ascii ( ) && unic_emoji_char:: is_emoji ( c) )
514+ || c == '\u{200d}'
515+ } ) ;
516+ // Known prefixes must have been handled earlier. So if
517+ // we see a prefix here, it is definitely an unknown prefix.
518+ match self . first ( ) {
519+ '#' | '"' | '\'' => UnknownPrefix ,
520+ _ => InvalidIdent ,
521+ }
522+ }
523+
499524 fn number ( & mut self , first_digit : char ) -> LiteralKind {
500525 debug_assert ! ( '0' <= self . prev( ) && self . prev( ) <= '9' ) ;
501526 let mut base = Base :: Decimal ;
0 commit comments