@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
246246 initialize (Offset, EndOffset);
247247}
248248
249- Lexer::Lexer (Lexer &Parent, State BeginState, State EndState)
249+ Lexer::Lexer (const Lexer &Parent, State BeginState, State EndState,
250+ bool EnableDiagnostics)
250251 : Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
251- Parent.getUnderlyingDiags(), Parent.LexMode,
252+ EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
253+ Parent.LexMode,
252254 Parent.IsHashbangAllowed
253255 ? HashbangMode::Allowed
254256 : HashbangMode::Disallowed,
@@ -1978,27 +1980,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19781980 }
19791981}
19801982
1981- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1983+ bool Lexer::isPotentialUnskippableBareSlashRegexLiteral (const Token &Tok) const {
1984+ if (!LangOpts.hasFeature (Feature::BareSlashRegexLiterals))
1985+ return false ;
1986+
1987+ // A `/.../` regex literal may only start on a binary or prefix operator.
1988+ if (Tok.isNot (tok::oper_prefix, tok::oper_binary_spaced,
1989+ tok::oper_binary_unspaced)) {
1990+ return false ;
1991+ }
1992+ auto SlashIdx = Tok.getText ().find (" /" );
1993+ if (SlashIdx == StringRef::npos)
1994+ return false ;
1995+
1996+ auto Offset = getBufferPtrForSourceLoc (Tok.getLoc ()) + SlashIdx;
1997+ bool CompletelyErroneous;
1998+ if (tryScanRegexLiteral (Offset, /* MustBeRegex*/ false , /* Diags*/ nullptr ,
1999+ CompletelyErroneous)) {
2000+ // Definitely a regex literal.
2001+ return true ;
2002+ }
2003+
2004+ // A prefix '/' can never be a regex literal if it failed a heuristic.
2005+ if (Tok.is (tok::oper_prefix))
2006+ return false ;
2007+
2008+ // We either don't have a regex literal, or we failed a heuristic. We now need
2009+ // to make sure we don't have an unbalanced `{` or `}`, as that would have the
2010+ // potential to change the range of a skipped body if we try to more
2011+ // agressively lex a regex literal during normal parsing. If we have balanced
2012+ // `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2013+ // worse diagnostic.
2014+ // FIXME: We ought to silence lexer diagnostics when skipping, this would
2015+ // avoid emitting a worse diagnostic.
2016+ auto *EndPtr = tryScanRegexLiteral (Offset, /* MustBeRegex*/ true ,
2017+ /* Diags*/ nullptr , CompletelyErroneous);
2018+ if (!EndPtr)
2019+ return false ;
2020+
2021+ Lexer L (*this , State (Tok.getLoc ().getAdvancedLoc (Tok.getLength ())),
2022+ State (getSourceLoc (EndPtr)), /* EnableDiagnostics*/ false );
2023+
2024+ unsigned OpenBraces = 0 ;
2025+ while (L.peekNextToken ().isNot (tok::eof)) {
2026+ Token Tok;
2027+ L.lex (Tok);
2028+ if (Tok.is (tok::l_brace))
2029+ OpenBraces += 1 ;
2030+ if (Tok.is (tok::r_brace)) {
2031+ if (OpenBraces == 0 )
2032+ return true ;
2033+ OpenBraces -= 1 ;
2034+ }
2035+ }
2036+
2037+ // If we have an unbalanced `{`, this is unskippable.
2038+ return OpenBraces != 0 ;
2039+ }
2040+
2041+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
2042+ DiagnosticEngine *Diags,
2043+ bool &CompletelyErroneous) const {
19822044 // We need to have experimental string processing enabled, and have the
19832045 // parsing logic for regex literals available.
19842046 if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1985- return false ;
2047+ return nullptr ;
19862048
1987- bool MustBeRegex = true ;
19882049 bool IsForwardSlash = (*TokStart == ' /' );
19892050
19902051 // Check if we're able to lex a `/.../` regex.
19912052 if (IsForwardSlash) {
1992- switch (ForwardSlashRegexMode) {
1993- case LexerForwardSlashRegexMode::None:
1994- return false ;
1995- case LexerForwardSlashRegexMode::Tentative:
1996- MustBeRegex = false ;
1997- break ;
1998- case LexerForwardSlashRegexMode::Always:
1999- break ;
2000- }
2001-
20022053 // For `/.../` regex literals, we need to ban space and tab at the start of
20032054 // a regex to avoid ambiguity with operator chains, e.g:
20042055 //
@@ -2016,23 +2067,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20162067 case ' ' :
20172068 case ' \t ' : {
20182069 if (!MustBeRegex)
2019- return false ;
2070+ return nullptr ;
20202071
2021- // We must have a regex, so emit an error for space and tab.
2022- StringRef DiagChar;
2023- switch (*RegexContentStart) {
2024- case ' ' :
2025- DiagChar = " space" ;
2026- break ;
2027- case ' \t ' :
2028- DiagChar = " tab" ;
2029- break ;
2030- default :
2031- llvm_unreachable (" Unhandled case" );
2072+ if (Diags) {
2073+ // We must have a regex, so emit an error for space and tab.
2074+ StringRef DiagChar;
2075+ switch (*RegexContentStart) {
2076+ case ' ' :
2077+ DiagChar = " space" ;
2078+ break ;
2079+ case ' \t ' :
2080+ DiagChar = " tab" ;
2081+ break ;
2082+ default :
2083+ llvm_unreachable (" Unhandled case" );
2084+ }
2085+ Diags->diagnose (getSourceLoc (RegexContentStart),
2086+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2087+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
20322088 }
2033- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2034- DiagChar)
2035- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
20362089 break ;
20372090 }
20382091 default :
@@ -2045,25 +2098,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20452098 // - CompletelyErroneous will be set if there was an error that cannot be
20462099 // recovered from.
20472100 auto *Ptr = TokStart;
2048- bool CompletelyErroneous = regexLiteralLexingFn (
2049- &Ptr, BufferEnd, MustBeRegex,
2050- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2101+ CompletelyErroneous = regexLiteralLexingFn (
2102+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
20512103
20522104 // If we didn't make any lexing progress, this isn't a regex literal and we
20532105 // should fallback to lexing as something else.
20542106 if (Ptr == TokStart)
2055- return false ;
2107+ return nullptr ;
20562108
20572109 // If we're lexing `/.../`, error if we ended on the opening of a comment.
20582110 // We prefer to lex the comment as it's more likely than not that is what
20592111 // the user is expecting.
20602112 // TODO: This should be sunk into the Swift library.
20612113 if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
20622114 if (!MustBeRegex)
2063- return false ;
2064-
2065- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2115+ return nullptr ;
20662116
2117+ if (Diags) {
2118+ Diags->diagnose (getSourceLoc (TokStart),
2119+ diag::lex_regex_literal_unterminated);
2120+ }
20672121 // Move the pointer back to the '/' of the comment.
20682122 Ptr--;
20692123 }
@@ -2096,7 +2150,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20962150
20972151 // Invalid, so bail.
20982152 if (GroupDepth == 0 )
2099- return false ;
2153+ return nullptr ;
21002154
21012155 GroupDepth -= 1 ;
21022156 break ;
@@ -2109,9 +2163,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21092163 }
21102164 }
21112165 }
2166+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2167+ return Ptr;
2168+ }
2169+
2170+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2171+ bool IsForwardSlash = (*TokStart == ' /' );
2172+ bool MustBeRegex = true ;
2173+
2174+ if (IsForwardSlash) {
2175+ switch (ForwardSlashRegexMode) {
2176+ case LexerForwardSlashRegexMode::None:
2177+ return false ;
2178+ case LexerForwardSlashRegexMode::Tentative:
2179+ MustBeRegex = false ;
2180+ break ;
2181+ case LexerForwardSlashRegexMode::Always:
2182+ break ;
2183+ }
2184+ }
2185+ bool CompletelyErroneous = false ;
2186+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2187+ CompletelyErroneous);
2188+ if (!Ptr)
2189+ return false ;
21122190
21132191 // Update to point to where we ended regex lexing.
2114- assert (Ptr > TokStart && Ptr <= BufferEnd);
21152192 CurPtr = Ptr;
21162193
21172194 // If the lexing was completely erroneous, form an unknown token.
0 commit comments