swiftlang
diff --git a/‎include/swift/AST/DiagnosticEngine.h‎
Lines changed: 1 addition & 1 deletion b/‎include/swift/AST/DiagnosticEngine.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/swift/Parse/Lexer.h‎
Lines changed: 18 additions & 2 deletions b/‎include/swift/Parse/Lexer.h‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎include/swift/Parse/Parser.h‎
Lines changed: 3 additions & 10 deletions b/‎include/swift/Parse/Parser.h‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎lib/Parse/Lexer.cpp‎
Lines changed: 116 additions & 39 deletions b/‎lib/Parse/Lexer.cpp‎
Lines changed: 116 additions & 39 deletions
@@ -1390,7 +1390,7 @@ namespace swift {
     DiagnosticEngine &getDiags() { return QueueEngine; }
 
     /// Retrieve the underlying engine which will receive the diagnostics.
-    DiagnosticEngine &getUnderlyingDiags() { return UnderlyingEngine; }
+    DiagnosticEngine &getUnderlyingDiags() const { return UnderlyingEngine; }
 
     /// Clear this queue and erase all diagnostics recorded.
     void clear() {
 
@@ -180,7 +180,7 @@ class Lexer {
   /// Retrieve the underlying diagnostic engine we emit diagnostics to. Note
   /// this should only be used for diagnostics not concerned with the current
   /// token.
-  DiagnosticEngine *getUnderlyingDiags() {
+  DiagnosticEngine *getUnderlyingDiags() const {
     return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr;
   }
 
@@ -218,7 +218,10 @@ class Lexer {
   /// \param Parent the parent lexer that scans the whole buffer
   /// \param BeginState start of the subrange
   /// \param EndState end of the subrange
-  Lexer(Lexer &Parent, State BeginState, State EndState);
+  /// \param EnableDiagnostics Whether to inherit the diagnostic engine of
+  /// \p Parent. If \c false, diagnostics will be disabled.
+  Lexer(const Lexer &Parent, State BeginState, State EndState,
+        bool EnableDiagnostics = true);
 
   /// Returns true if this lexer will produce a code completion token.
   bool isCodeCompletion() const {
@@ -577,6 +580,13 @@ class Lexer {
                             : LexerForwardSlashRegexMode::Tentative) {}
   };
 
+  /// Checks whether a given token could potentially contain the start of an
+  /// unskippable `/.../` regex literal. Such tokens need to go through the
+  /// parser, as they may become regex literal tokens. This includes operator
+  /// tokens such as `!/` which could be split into prefix `!` on a regex
+  /// literal.
+  bool isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const;
+
 private:
   /// Nul character meaning kind.
   enum class NulCharacterKind {
@@ -641,6 +651,12 @@ class Lexer {
   void lexStringLiteral(unsigned CustomDelimiterLen = 0);
   void lexEscapedIdentifier();
 
+  /// Attempt to scan a regex literal, returning the end pointer, or `nullptr`
+  /// if a regex literal cannot be scanned.
+  const char *tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
+                                  DiagnosticEngine *Diags,
+                                  bool &CompletelyErroneous) const;
+
   /// Attempt to lex a regex literal, returning true if lexing should continue,
   /// false if this is not a regex literal.
   bool tryLexRegexLiteral(const char *TokStart);
 
@@ -718,13 +718,6 @@ class Parser {
   /// plain Tok.is(T1) check).
   bool skipUntilTokenOrEndOfLine(tok T1, tok T2 = tok::NUM_TOKENS);
 
-  /// Skip a braced block (e.g. function body). The current token must be '{'.
-  /// Returns \c true if the parser hit the eof before finding matched '}'.
-  ///
-  /// Set \c HasNestedTypeDeclarations to true if a token for a type
-  /// declaration is detected in the skipped block.
-  bool skipBracedBlock(bool &HasNestedTypeDeclarations);
-
   /// Skip over SIL decls until we encounter the start of a Swift decl or eof.
   void skipSILUntilSwiftDecl();
 
@@ -1001,6 +994,8 @@ class Parser {
   bool canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
                                  bool &HasNestedClassDeclarations);
 
+  bool canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations);
+
   bool delayParsingDeclList(SourceLoc LBLoc, SourceLoc &RBLoc,
                             IterableDeclContext *IDC);
 
@@ -1211,9 +1206,7 @@ class Parser {
                                        bool &hasEffectfulGet,
                                        AccessorKind currentKind,
                                        SourceLoc const& currentLoc);
-  
-  void consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
-                                   const DeclAttributes &Attrs);
+
   ParserResult<FuncDecl> parseDeclFunc(SourceLoc StaticLoc,
                                        StaticSpellingKind StaticSpelling,
                                        ParseDeclOptions Flags,
 
@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
   initialize(Offset, EndOffset);
 }
 
-Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
+Lexer::Lexer(const Lexer &Parent, State BeginState, State EndState,
+             bool EnableDiagnostics)
     : Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
-            Parent.getUnderlyingDiags(), Parent.LexMode,
+            EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
+            Parent.LexMode,
             Parent.IsHashbangAllowed
                 ? HashbangMode::Allowed
                 : HashbangMode::Disallowed,
@@ -1978,27 +1980,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
   }
 }
 
-bool Lexer::tryLexRegexLiteral(const char *TokStart) {
+bool Lexer::isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const {
+  if (!LangOpts.hasFeature(Feature::BareSlashRegexLiterals))
+    return false;
+
+  // A `/.../` regex literal may only start on a binary or prefix operator.
+  if (Tok.isNot(tok::oper_prefix, tok::oper_binary_spaced,
+                tok::oper_binary_unspaced)) {
+    return false;
+  }
+  auto SlashIdx = Tok.getText().find("/");
+  if (SlashIdx == StringRef::npos)
+    return false;
+
+  auto Offset = getBufferPtrForSourceLoc(Tok.getLoc()) + SlashIdx;
+  bool CompletelyErroneous;
+  if (tryScanRegexLiteral(Offset, /*MustBeRegex*/ false, /*Diags*/ nullptr,
+                          CompletelyErroneous)) {
+    // Definitely a regex literal.
+    return true;
+  }
+
+  // A prefix '/' can never be a regex literal if it failed a heuristic.
+  if (Tok.is(tok::oper_prefix))
+    return false;
+
+  // We either don't have a regex literal, or we failed a heuristic. We now need
+  // to make sure we don't have an unbalanced `{` or `}`, as that would have the
+  // potential to change the range of a skipped body if we try to more
+  // agressively lex a regex literal during normal parsing. If we have balanced
+  // `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
+  // worse diagnostic.
+  // FIXME: We ought to silence lexer diagnostics when skipping, this would
+  // avoid emitting a worse diagnostic.
+  auto *EndPtr = tryScanRegexLiteral(Offset, /*MustBeRegex*/ true,
+                                     /*Diags*/ nullptr, CompletelyErroneous);
+  if (!EndPtr)
+    return false;
+
+  Lexer L(*this, State(Tok.getLoc().getAdvancedLoc(Tok.getLength())),
+          State(getSourceLoc(EndPtr)), /*EnableDiagnostics*/ false);
+
+  unsigned OpenBraces = 0;
+  while (L.peekNextToken().isNot(tok::eof)) {
+    Token Tok;
+    L.lex(Tok);
+    if (Tok.is(tok::l_brace))
+      OpenBraces += 1;
+    if (Tok.is(tok::r_brace)) {
+      if (OpenBraces == 0)
+        return true;
+      OpenBraces -= 1;
+    }
+  }
+
+  // If we have an unbalanced `{`, this is unskippable.
+  return OpenBraces != 0;
+}
+
+const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
+                                       DiagnosticEngine *Diags,
+                                       bool &CompletelyErroneous) const {
   // We need to have experimental string processing enabled, and have the
   // parsing logic for regex literals available.
   if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
-    return false;
+    return nullptr;
 
-  bool MustBeRegex = true;
   bool IsForwardSlash = (*TokStart == '/');
 
   // Check if we're able to lex a `/.../` regex.
   if (IsForwardSlash) {
-    switch (ForwardSlashRegexMode) {
-    case LexerForwardSlashRegexMode::None:
-      return false;
-    case LexerForwardSlashRegexMode::Tentative:
-      MustBeRegex = false;
-      break;
-    case LexerForwardSlashRegexMode::Always:
-      break;
-    }
-
     // For `/.../` regex literals, we need to ban space and tab at the start of
     // a regex to avoid ambiguity with operator chains, e.g:
     //
@@ -2016,23 +2067,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
     case ' ':
     case '\t': {
       if (!MustBeRegex)
-        return false;
+        return nullptr;
 
-      // We must have a regex, so emit an error for space and tab.
-      StringRef DiagChar;
-      switch (*RegexContentStart) {
-      case ' ':
-        DiagChar = "space";
-        break;
-      case '\t':
-        DiagChar = "tab";
-        break;
-      default:
-        llvm_unreachable("Unhandled case");
+      if (Diags) {
+        // We must have a regex, so emit an error for space and tab.
+        StringRef DiagChar;
+        switch (*RegexContentStart) {
+        case ' ':
+          DiagChar = "space";
+          break;
+        case '\t':
+          DiagChar = "tab";
+          break;
+        default:
+          llvm_unreachable("Unhandled case");
+        }
+        Diags->diagnose(getSourceLoc(RegexContentStart),
+                        diag::lex_regex_literal_invalid_starting_char, DiagChar)
+            .fixItInsert(getSourceLoc(RegexContentStart), "\\");
       }
-      diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
-               DiagChar)
-          .fixItInsert(getSourceLoc(RegexContentStart), "\\");
       break;
     }
     default:
@@ -2045,25 +2098,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
   // - CompletelyErroneous will be set if there was an error that cannot be
   //   recovered from.
   auto *Ptr = TokStart;
-  bool CompletelyErroneous = regexLiteralLexingFn(
-      &Ptr, BufferEnd, MustBeRegex,
-      getBridgedOptionalDiagnosticEngine(getTokenDiags()));
+  CompletelyErroneous = regexLiteralLexingFn(
+      &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine(Diags));
 
   // If we didn't make any lexing progress, this isn't a regex literal and we
   // should fallback to lexing as something else.
   if (Ptr == TokStart)
-    return false;
+    return nullptr;
 
   // If we're lexing `/.../`, error if we ended on the opening of a comment.
   // We prefer to lex the comment as it's more likely than not that is what
   // the user is expecting.
   // TODO: This should be sunk into the Swift library.
   if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
     if (!MustBeRegex)
-      return false;
-
-    diagnose(TokStart, diag::lex_regex_literal_unterminated);
+      return nullptr;
 
+    if (Diags) {
+      Diags->diagnose(getSourceLoc(TokStart),
+                      diag::lex_regex_literal_unterminated);
+    }
     // Move the pointer back to the '/' of the comment.
     Ptr--;
   }
@@ -2096,7 +2150,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
 
         // Invalid, so bail.
         if (GroupDepth == 0)
-          return false;
+          return nullptr;
 
         GroupDepth -= 1;
         break;
@@ -2109,9 +2163,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
       }
     }
   }
+  assert(Ptr > TokStart && Ptr <= BufferEnd);
+  return Ptr;
+}
+
+bool Lexer::tryLexRegexLiteral(const char *TokStart) {
+  bool IsForwardSlash = (*TokStart == '/');
+  bool MustBeRegex = true;
+
+  if (IsForwardSlash) {
+    switch (ForwardSlashRegexMode) {
+    case LexerForwardSlashRegexMode::None:
+      return false;
+    case LexerForwardSlashRegexMode::Tentative:
+      MustBeRegex = false;
+      break;
+    case LexerForwardSlashRegexMode::Always:
+      break;
+    }
+  }
+  bool CompletelyErroneous = false;
+  auto *Ptr = tryScanRegexLiteral(TokStart, MustBeRegex, getTokenDiags(),
+                                  CompletelyErroneous);
+  if (!Ptr)
+    return false;
 
   // Update to point to where we ended regex lexing.
-  assert(Ptr > TokStart && Ptr <= BufferEnd);
   CurPtr = Ptr;
 
   // If the lexing was completely erroneous, form an unknown token.