@@ -900,6 +900,119 @@ extension Parser {
900900 }
901901}
902902
903+ extension TokenConsumer {
904+ /// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
905+ ///
906+ /// - Parameters:
907+ /// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
908+ /// use `.basic`.
909+ /// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
910+ /// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
911+ /// use `!preferPostfixExpr` as the result.
912+ mutating func atContextualKeywordPrefixedSyntax(
913+ exprFlavor: Parser . ExprFlavor ,
914+ acceptClosure: Bool = false ,
915+ preferPostfixExpr: Bool = true
916+ ) -> Bool {
917+ let next = peek ( )
918+
919+ // The next token must be at the same line.
920+ if next. isAtStartOfLine {
921+ return false
922+ }
923+
924+ switch next. rawTokenKind {
925+
926+ case . identifier, . dollarIdentifier, . wildcard:
927+ // E.g. <word> foo
928+ return true
929+
930+ case . integerLiteral, . floatLiteral,
931+ . stringQuote, . multilineStringQuote, . singleQuote, . rawStringPoundDelimiter,
932+ . regexSlash, . regexPoundDelimiter:
933+ // E.g. <word> 1
934+ return true
935+
936+ case . prefixAmpersand, . prefixOperator, . atSign, . backslash, . pound:
937+ // E.g. <word> !<expr>
938+ return true
939+
940+ case . keyword:
941+ switch Keyword ( next. tokenText) {
942+ case . as, . is, . in:
943+ // E.g. <word> is <expr>
944+ return false
945+ default :
946+ // Other lexer-classified keywords are identifier-like.
947+ // E.g. <word> self
948+ return true
949+ }
950+
951+ case . binaryOperator, . equal, . arrow, . infixQuestionMark:
952+ // E.g. <word> != <expr>
953+ return false
954+ case . postfixOperator, . postfixQuestionMark, . exclamationMark, . ellipsis:
955+ // E.g. <word>++
956+ return false
957+ case . rightBrace, . rightParen, . rightSquare:
958+ // E.g. <word>]
959+ return false
960+ case . colon, . comma:
961+ // E.g. <word>,
962+ return false
963+ case . semicolon, . endOfFile, . poundElse, . poundElseif, . poundEndif:
964+ return false
965+
966+ case . leftAngle, . rightAngle:
967+ // Lexer never produce these token kinds.
968+ return false
969+
970+ case . stringSegment, . regexLiteralPattern:
971+ // Calling this function inside a string/regex literal?
972+ return false
973+
974+ case . backtick, . poundAvailable, . poundUnavailable,
975+ . poundSourceLocation, . poundIf, . shebang, . unknown:
976+ // These are invalid for both cases
977+ // E.g. <word> #available
978+ return false
979+
980+ case . period, . leftParen, . leftSquare:
981+ // These are truly ambiguous. They can be both start of postfix expression
982+ // suffix or start of primary expression:
983+ //
984+ // - Member access vs. implicit member expression
985+ // - Call vs. tuple expression
986+ // - Subscript vs. collection literal
987+ //
988+ let hasSpace = ( next. leadingTriviaByteLength + currentToken. trailingTriviaByteLength) != 0
989+ if !hasSpace {
990+ // No space, the word is an decl-ref expression
991+ return false
992+ }
993+ return !preferPostfixExpr
994+
995+ case . leftBrace:
996+ // E.g. <word> { ... }
997+ // Trailing closure is also ambiguous:
998+ //
999+ // - Trailing closure vs. immediately-invoked closure
1000+ //
1001+ // Checking whitespace between the word cannot help this because people
1002+ // usually put a space before trailing closures. Even though that is source
1003+ // breaking, we prefer parsing it as a keyword if the syntax accepts
1004+ // immediately-invoked closure patterns. E.g. 'unsafe { ... }()'
1005+ if !acceptClosure {
1006+ return false
1007+ }
1008+ return self . withLookahead {
1009+ $0. consumeAnyToken ( )
1010+ return $0. atValidTrailingClosure ( flavor: exprFlavor)
1011+ }
1012+ }
1013+ }
1014+ }
1015+
9031016// MARK: Lookahead
9041017
9051018extension Parser . Lookahead {
@@ -949,91 +1062,16 @@ extension Parser.Lookahead {
9491062 // FIXME: 'repeat' followed by '{' could be a pack expansion
9501063 // with a closure pattern.
9511064 return self . peek ( ) . rawTokenKind == . leftBrace
952- case . yield? :
953- switch self . peek ( ) . rawTokenKind {
954- case . prefixAmpersand:
955- // "yield &" always denotes a yield statement.
956- return true
957- case . leftParen:
958- // "yield (", by contrast, must be disambiguated with additional
959- // context. We always consider it an apply expression of a function
960- // called `yield` for the purposes of the parse.
961- return false
962- case . binaryOperator:
963- // 'yield &= x' treats yield as an identifier.
964- return false
965- default :
966- // "yield" followed immediately by any other token is likely a
967- // yield statement of some singular expression.
968- return !self . peek ( ) . isAtStartOfLine
969- }
970- case . discard? :
971- let next = peek ( )
972- // The thing to be discarded must be on the same line as `discard`.
973- if next. isAtStartOfLine {
974- return false
975- }
976- switch next. rawTokenKind {
977- case . identifier, . keyword:
978- // Since some identifiers like "self" are classified as keywords,
979- // we want to recognize those too, to handle "discard self". We also
980- // accept any identifier since we want to emit a nice error message
981- // later on during type checking.
982- return true
983- default :
984- // any other token following "discard" means it's not the statement.
985- // For example, could be the function call "discard()".
986- return false
987- }
988-
989- case . then:
990- return atStartOfThenStatement ( preferExpr: preferExpr)
1065+ case . yield? , . discard? :
1066+ return atContextualKeywordPrefixedSyntax ( exprFlavor: . basic, preferPostfixExpr: true )
1067+ case . then? :
1068+ return atContextualKeywordPrefixedSyntax ( exprFlavor: . basic, preferPostfixExpr: false )
9911069
9921070 case nil :
9931071 return false
9941072 }
9951073 }
9961074
997- /// Whether we're currently at a `then` token that should be parsed as a
998- /// `then` statement.
999- mutating func atStartOfThenStatement( preferExpr: Bool ) -> Bool {
1000- guard self . at ( . keyword( . then) ) else {
1001- return false
1002- }
1003-
1004- // If we prefer an expr and aren't at the start of a newline, then don't
1005- // parse a ThenStmt.
1006- if preferExpr && !self . atStartOfLine {
1007- return false
1008- }
1009-
1010- // If 'then' is followed by a binary or postfix operator, prefer to parse as
1011- // an expr.
1012- if peek ( isAtAnyIn: BinaryOperatorLike . self) != nil || peek ( isAtAnyIn: PostfixOperatorLike . self) != nil {
1013- return false
1014- }
1015-
1016- switch PrepareForKeywordMatch ( peek ( ) ) {
1017- case TokenSpec ( . is) , TokenSpec ( . as) :
1018- // Treat 'is' and 'as' like the binary operator case, and parse as an
1019- // expr.
1020- return false
1021-
1022- case . leftBrace:
1023- // This is a trailing closure.
1024- return false
1025-
1026- case . leftParen, . leftSquare, . period:
1027- // These are handled based on whether there is trivia between the 'then'
1028- // and the token. If so, it's a 'then' statement. Otherwise it should
1029- // be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
1030- return !self . currentToken. trailingTriviaText. isEmpty || !peek( ) . leadingTriviaText. isEmpty
1031- default :
1032- break
1033- }
1034- return true
1035- }
1036-
10371075 /// Returns whether the parser's current position is the start of a switch case,
10381076 /// given that we're in the middle of a switch already.
10391077 mutating func atStartOfSwitchCase( allowRecovery: Bool = false ) -> Bool {
0 commit comments