@@ -1980,27 +1980,18 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19801980 }
19811981}
19821982
1983- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1983+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
1984+ DiagnosticEngine *Diags,
1985+ bool &CompletelyErroneous) const {
19841986 // We need to have experimental string processing enabled, and have the
19851987 // parsing logic for regex literals available.
19861988 if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1987- return false ;
1989+ return nullptr ;
19881990
1989- bool MustBeRegex = true ;
19901991 bool IsForwardSlash = (*TokStart == ' /' );
19911992
19921993 // Check if we're able to lex a `/.../` regex.
19931994 if (IsForwardSlash) {
1994- switch (ForwardSlashRegexMode) {
1995- case LexerForwardSlashRegexMode::None:
1996- return false ;
1997- case LexerForwardSlashRegexMode::Tentative:
1998- MustBeRegex = false ;
1999- break ;
2000- case LexerForwardSlashRegexMode::Always:
2001- break ;
2002- }
2003-
20041995 // For `/.../` regex literals, we need to ban space and tab at the start of
20051996 // a regex to avoid ambiguity with operator chains, e.g:
20061997 //
@@ -2018,23 +2009,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20182009 case ' ' :
20192010 case ' \t ' : {
20202011 if (!MustBeRegex)
2021- return false ;
2012+ return nullptr ;
20222013
2023- // We must have a regex, so emit an error for space and tab.
2024- StringRef DiagChar;
2025- switch (*RegexContentStart) {
2026- case ' ' :
2027- DiagChar = " space" ;
2028- break ;
2029- case ' \t ' :
2030- DiagChar = " tab" ;
2031- break ;
2032- default :
2033- llvm_unreachable (" Unhandled case" );
2014+ if (Diags) {
2015+ // We must have a regex, so emit an error for space and tab.
2016+ StringRef DiagChar;
2017+ switch (*RegexContentStart) {
2018+ case ' ' :
2019+ DiagChar = " space" ;
2020+ break ;
2021+ case ' \t ' :
2022+ DiagChar = " tab" ;
2023+ break ;
2024+ default :
2025+ llvm_unreachable (" Unhandled case" );
2026+ }
2027+ Diags->diagnose (getSourceLoc (RegexContentStart),
2028+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2029+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
20342030 }
2035- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2036- DiagChar)
2037- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
20382031 break ;
20392032 }
20402033 default :
@@ -2047,25 +2040,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20472040 // - CompletelyErroneous will be set if there was an error that cannot be
20482041 // recovered from.
20492042 auto *Ptr = TokStart;
2050- bool CompletelyErroneous = regexLiteralLexingFn (
2051- &Ptr, BufferEnd, MustBeRegex,
2052- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2043+ CompletelyErroneous = regexLiteralLexingFn (
2044+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
20532045
20542046 // If we didn't make any lexing progress, this isn't a regex literal and we
20552047 // should fallback to lexing as something else.
20562048 if (Ptr == TokStart)
2057- return false ;
2049+ return nullptr ;
20582050
20592051 // If we're lexing `/.../`, error if we ended on the opening of a comment.
20602052 // We prefer to lex the comment as it's more likely than not that is what
20612053 // the user is expecting.
20622054 // TODO: This should be sunk into the Swift library.
20632055 if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
20642056 if (!MustBeRegex)
2065- return false ;
2066-
2067- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2057+ return nullptr ;
20682058
2059+ if (Diags) {
2060+ Diags->diagnose (getSourceLoc (TokStart),
2061+ diag::lex_regex_literal_unterminated);
2062+ }
20692063 // Move the pointer back to the '/' of the comment.
20702064 Ptr--;
20712065 }
@@ -2098,7 +2092,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20982092
20992093 // Invalid, so bail.
21002094 if (GroupDepth == 0 )
2101- return false ;
2095+ return nullptr ;
21022096
21032097 GroupDepth -= 1 ;
21042098 break ;
@@ -2111,9 +2105,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21112105 }
21122106 }
21132107 }
2108+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2109+ return Ptr;
2110+ }
2111+
2112+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2113+ bool IsForwardSlash = (*TokStart == ' /' );
2114+ bool MustBeRegex = true ;
2115+
2116+ if (IsForwardSlash) {
2117+ switch (ForwardSlashRegexMode) {
2118+ case LexerForwardSlashRegexMode::None:
2119+ return false ;
2120+ case LexerForwardSlashRegexMode::Tentative:
2121+ MustBeRegex = false ;
2122+ break ;
2123+ case LexerForwardSlashRegexMode::Always:
2124+ break ;
2125+ }
2126+ }
2127+ bool CompletelyErroneous = false ;
2128+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2129+ CompletelyErroneous);
2130+ if (!Ptr)
2131+ return false ;
21142132
21152133 // Update to point to where we ended regex lexing.
2116- assert (Ptr > TokStart && Ptr <= BufferEnd);
21172134 CurPtr = Ptr;
21182135
21192136 // If the lexing was completely erroneous, form an unknown token.
0 commit comments