@@ -900,6 +900,128 @@ extension Parser {
900900 }
901901}
902902
903+ extension TokenConsumer {
904+ /// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
905+ ///
906+ /// - Parameters:
907+ /// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
908+ /// use `.basic`.
909+ /// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
910+ /// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
911+ /// use `!preferPostfixExpr` as the result.
912+ /// - allowNextLineOperand: Whether the keyword-prefixed syntax accepts the operand on the next line.
913+ mutating func atContextualKeywordPrefixedSyntax(
914+ exprFlavor: Parser . ExprFlavor ,
915+ acceptClosure: Bool = false ,
916+ preferPostfixExpr: Bool = true ,
917+ allowNextLineOperand: Bool = false
918+ ) -> Bool {
919+ let next = peek ( )
920+
921+ // The next token must be at the same line.
922+ if next. isAtStartOfLine && !allowNextLineOperand {
923+ return false
924+ }
925+
926+ switch next. rawTokenKind {
927+
928+ case . identifier, . dollarIdentifier, . wildcard:
929+ // E.g. <word> foo
930+ return true
931+
932+ case . integerLiteral, . floatLiteral,
933+ . stringQuote, . multilineStringQuote, . singleQuote, . rawStringPoundDelimiter,
934+ . regexSlash, . regexPoundDelimiter:
935+ // E.g. <word> 1
936+ return true
937+
938+ case . prefixAmpersand, . prefixOperator, . atSign, . backslash, . pound:
939+ // E.g. <word> !<expr>
940+ return true
941+
942+ case . keyword:
943+ // Some lexer-classified keywords can start expressions.
944+ switch Keyword ( next. tokenText) {
945+ case . Any, . Self, . self , . super, . `init`, . true , . false , . nil :
946+ return true
947+ case . repeat , . try :
948+ return true
949+ case . if, . switch:
950+ return true
951+ case . do where self . experimentalFeatures. contains ( . doExpressions) :
952+ return true
953+
954+ default :
955+ return false
956+ }
957+
958+ case . binaryOperator, . equal, . arrow, . infixQuestionMark:
959+ // E.g. <word> != <expr>
960+ return false
961+ case . postfixOperator, . postfixQuestionMark, . exclamationMark, . ellipsis:
962+ // E.g. <word>++
963+ return false
964+ case . rightBrace, . rightParen, . rightSquare:
965+ // E.g. <word>]
966+ return false
967+ case . colon, . comma:
968+ // E.g. <word>,
969+ return false
970+ case . semicolon, . endOfFile, . poundElse, . poundElseif, . poundEndif:
971+ return false
972+
973+ case . leftAngle, . rightAngle:
974+ // Lexer never produce these token kinds.
975+ return false
976+
977+ case . stringSegment, . regexLiteralPattern:
978+ // Calling this function inside a string/regex literal?
979+ return false
980+
981+ case . backtick, . poundAvailable, . poundUnavailable,
982+ . poundSourceLocation, . poundIf, . shebang, . unknown:
983+ // These are invalid for both cases
984+ // E.g. <word> #available
985+ return false
986+
987+ case . period, . leftParen, . leftSquare:
988+ // These are truly ambiguous. They can be both start of postfix expression
989+ // suffix or start of primary expression:
990+ //
991+ // - Member access vs. implicit member expression
992+ // - Call vs. tuple expression
993+ // - Subscript vs. collection literal
994+ //
995+ if preferPostfixExpr {
996+ return false
997+ }
998+
999+ // If there's no space between the tokens, consider it's an expression.
1000+ // Otherwise, it looks like a keyword followed by an expression.
1001+ return ( next. leadingTriviaByteLength + currentToken. trailingTriviaByteLength) != 0
1002+
1003+ case . leftBrace:
1004+ // E.g. <word> { ... }
1005+ // Trailing closure is also ambiguous:
1006+ //
1007+ // - Trailing closure vs. immediately-invoked closure
1008+ //
1009+ if !acceptClosure {
1010+ return false
1011+ }
1012+
1013+ // Checking whitespace between the word cannot help this because people
1014+ // usually put a space before trailing closures. Even though that is source
1015+ // breaking, we prefer parsing it as a keyword if the syntax accepts
1016+ // expressions starting with a closure. E.g. 'unsafe { ... }()'
1017+ return self . withLookahead {
1018+ $0. consumeAnyToken ( )
1019+ return $0. atValidTrailingClosure ( flavor: exprFlavor)
1020+ }
1021+ }
1022+ }
1023+ }
1024+
9031025// MARK: Lookahead
9041026
9051027extension Parser . Lookahead {
@@ -949,91 +1071,23 @@ extension Parser.Lookahead {
9491071 // FIXME: 'repeat' followed by '{' could be a pack expansion
9501072 // with a closure pattern.
9511073 return self . peek ( ) . rawTokenKind == . leftBrace
952- case . yield? :
953- switch self . peek ( ) . rawTokenKind {
954- case . prefixAmpersand:
955- // "yield &" always denotes a yield statement.
956- return true
957- case . leftParen:
958- // "yield (", by contrast, must be disambiguated with additional
959- // context. We always consider it an apply expression of a function
960- // called `yield` for the purposes of the parse.
961- return false
962- case . binaryOperator:
963- // 'yield &= x' treats yield as an identifier.
964- return false
965- default :
966- // "yield" followed immediately by any other token is likely a
967- // yield statement of some singular expression.
968- return !self . peek ( ) . isAtStartOfLine
969- }
970- case . discard? :
971- let next = peek ( )
972- // The thing to be discarded must be on the same line as `discard`.
973- if next. isAtStartOfLine {
974- return false
975- }
976- switch next. rawTokenKind {
977- case . identifier, . keyword:
978- // Since some identifiers like "self" are classified as keywords,
979- // we want to recognize those too, to handle "discard self". We also
980- // accept any identifier since we want to emit a nice error message
981- // later on during type checking.
982- return true
983- default :
984- // any other token following "discard" means it's not the statement.
985- // For example, could be the function call "discard()".
986- return false
987- }
988-
989- case . then:
990- return atStartOfThenStatement ( preferExpr: preferExpr)
1074+ case . yield? , . discard? :
1075+ return atContextualKeywordPrefixedSyntax (
1076+ exprFlavor: . basic,
1077+ preferPostfixExpr: true
1078+ )
1079+ case . then? :
1080+ return atContextualKeywordPrefixedSyntax (
1081+ exprFlavor: . basic,
1082+ preferPostfixExpr: false ,
1083+ allowNextLineOperand: !preferExpr
1084+ )
9911085
9921086 case nil :
9931087 return false
9941088 }
9951089 }
9961090
997- /// Whether we're currently at a `then` token that should be parsed as a
998- /// `then` statement.
999- mutating func atStartOfThenStatement( preferExpr: Bool ) -> Bool {
1000- guard self . at ( . keyword( . then) ) else {
1001- return false
1002- }
1003-
1004- // If we prefer an expr and aren't at the start of a newline, then don't
1005- // parse a ThenStmt.
1006- if preferExpr && !self . atStartOfLine {
1007- return false
1008- }
1009-
1010- // If 'then' is followed by a binary or postfix operator, prefer to parse as
1011- // an expr.
1012- if peek ( isAtAnyIn: BinaryOperatorLike . self) != nil || peek ( isAtAnyIn: PostfixOperatorLike . self) != nil {
1013- return false
1014- }
1015-
1016- switch PrepareForKeywordMatch ( peek ( ) ) {
1017- case TokenSpec ( . is) , TokenSpec ( . as) :
1018- // Treat 'is' and 'as' like the binary operator case, and parse as an
1019- // expr.
1020- return false
1021-
1022- case . leftBrace:
1023- // This is a trailing closure.
1024- return false
1025-
1026- case . leftParen, . leftSquare, . period:
1027- // These are handled based on whether there is trivia between the 'then'
1028- // and the token. If so, it's a 'then' statement. Otherwise it should
1029- // be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
1030- return !self . currentToken. trailingTriviaText. isEmpty || !peek( ) . leadingTriviaText. isEmpty
1031- default :
1032- break
1033- }
1034- return true
1035- }
1036-
10371091 /// Returns whether the parser's current position is the start of a switch case,
10381092 /// given that we're in the middle of a switch already.
10391093 mutating func atStartOfSwitchCase( allowRecovery: Bool = false ) -> Bool {
0 commit comments