@@ -1969,22 +1969,109 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
19691969 if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
19701970 return false ;
19711971
1972- // Ask libswift to try and lex a regex literal.
1972+ bool MustBeRegex = true ;
1973+ bool IsForwardSlash = (*TokStart == ' /' );
1974+
1975+ // Check if we're able to lex a `/.../` regex.
1976+ if (IsForwardSlash) {
1977+ switch (ForwardSlashRegexMode) {
1978+ case LexerForwardSlashRegexMode::None:
1979+ return false ;
1980+ case LexerForwardSlashRegexMode::Tentative:
1981+ MustBeRegex = false ;
1982+ break ;
1983+ case LexerForwardSlashRegexMode::Always:
1984+ break ;
1985+ }
1986+
1987+ // For `/.../` regex literals, we need to ban space and tab at the start of
1988+ // a regex to avoid ambiguity with operator chains, e.g:
1989+ //
1990+ // Builder {
1991+ // 0
1992+ // / 1 /
1993+ // 2
1994+ // }
1995+ //
1996+ // This takes advantage of the consistent operator spacing rule. We also
1997+ // need to ban ')' to avoid ambiguity with unapplied operator references e.g
1998+ // `reduce(1, /)`. This would be invalid regex syntax anyways. Note this
1999+ // doesn't totally save us from e.g `foo(/, 0)`, but it should at least
2000+ // help, and it ensures users can always surround their operator ref in
2001+ // parens `(/)` to fix the issue.
2002+ // TODO: This heuristic should be sunk into the Swift library once we have a
2003+ // way of doing fix-its from there.
2004+ auto *RegexContentStart = TokStart + 1 ;
2005+ switch (*RegexContentStart) {
2006+ case ' )' : {
2007+ if (!MustBeRegex)
2008+ return false ;
2009+
2010+ // ')' is invalid anyway, so we can let the parser diagnose it.
2011+ break ;
2012+ }
2013+ case ' ' :
2014+ case ' \t ' : {
2015+ if (!MustBeRegex)
2016+ return false ;
2017+
2018+ // We must have a regex, so emit an error for space and tab.
2019+ StringRef DiagChar;
2020+ switch (*RegexContentStart) {
2021+ case ' ' :
2022+ DiagChar = " space" ;
2023+ break ;
2024+ case ' \t ' :
2025+ DiagChar = " tab" ;
2026+ break ;
2027+ default :
2028+ llvm_unreachable (" Unhandled case" );
2029+ }
2030+ diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2031+ DiagChar)
2032+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2033+ break ;
2034+ }
2035+ default :
2036+ break ;
2037+ }
2038+ }
2039+
2040+ // Ask the Swift library to try and lex a regex literal.
19732041 // - Ptr will not be advanced if this is not for a regex literal.
19742042 // - ErrStr will be set if there is any error to emit.
19752043 // - CompletelyErroneous will be set if there was an error that cannot be
19762044 // recovered from.
19772045 auto *Ptr = TokStart;
19782046 const char *ErrStr = nullptr ;
19792047 bool CompletelyErroneous = regexLiteralLexingFn (&Ptr, BufferEnd, &ErrStr);
1980- if (ErrStr)
1981- diagnose (TokStart, diag::regex_literal_parsing_error, ErrStr);
19822048
19832049 // If we didn't make any lexing progress, this isn't a regex literal and we
19842050 // should fallback to lexing as something else.
19852051 if (Ptr == TokStart)
19862052 return false ;
19872053
2054+ if (ErrStr) {
2055+ if (!MustBeRegex)
2056+ return false ;
2057+
2058+ diagnose (TokStart, diag::regex_literal_parsing_error, ErrStr);
2059+ }
2060+
2061+ // If we're lexing `/.../`, error if we ended on the opening of a comment.
2062+ // We prefer to lex the comment as it's more likely than not that is what
2063+ // the user is expecting.
2064+ // TODO: This should be sunk into the Swift library.
2065+ if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
2066+ if (!MustBeRegex)
2067+ return false ;
2068+
2069+ diagnose (TokStart, diag::lex_regex_literal_unterminated);
2070+
2071+ // Move the pointer back to the '/' of the comment.
2072+ Ptr--;
2073+ }
2074+
19882075 // Update to point to where we ended regex lexing.
19892076 assert (Ptr > TokStart && Ptr <= BufferEnd);
19902077 CurPtr = Ptr;
@@ -1996,12 +2083,23 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
19962083 return true ;
19972084 }
19982085
1999- // Otherwise, we either had a successful lex, or something that was
2000- // recoverable.
2086+ // We either had a successful lex, or something that was recoverable.
20012087 formToken (tok::regex_literal, TokStart);
20022088 return true ;
20032089}
20042090
2091+ void Lexer::tryLexForwardSlashRegexLiteralFrom (State S, bool mustBeRegex) {
2092+ if (!LangOpts.EnableBareSlashRegexLiterals )
2093+ return ;
2094+
2095+ // Try re-lex with forward slash enabled.
2096+ llvm::SaveAndRestore<LexerForwardSlashRegexMode> RegexLexingScope (
2097+ ForwardSlashRegexMode, mustBeRegex
2098+ ? LexerForwardSlashRegexMode::Always
2099+ : LexerForwardSlashRegexMode::Tentative);
2100+ restoreState (S, /* enableDiagnostics*/ true );
2101+ }
2102+
20052103// / lexEscapedIdentifier:
20062104// / identifier ::= '`' identifier '`'
20072105// /
@@ -2483,8 +2581,7 @@ void Lexer::lexImpl() {
24832581 if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter (CurPtr, Diags))
24842582 return lexStringLiteral (CustomDelimiterLen);
24852583
2486- // If we have experimental string processing enabled, try lex a regex
2487- // literal.
2584+ // Try lex a regex literal.
24882585 if (tryLexRegexLiteral (TokStart))
24892586 return ;
24902587
@@ -2505,6 +2602,10 @@ void Lexer::lexImpl() {
25052602 " Non token comment should be eaten by lexTrivia as LeadingTrivia" );
25062603 return formToken (tok::comment, TokStart);
25072604 }
2605+ // Try lex a regex literal.
2606+ if (tryLexRegexLiteral (TokStart))
2607+ return ;
2608+
25082609 return lexOperatorIdentifier ();
25092610 case ' %' :
25102611 // Lex %[0-9a-zA-Z_]+ as a local SIL value
0 commit comments