|
33 | 33 |
|
34 | 34 | #include <limits> |
35 | 35 |
|
| 36 | +// Regex lexing delivered via libSwift. |
| 37 | +#include "swift/Parse/ExperimentalRegexBridging.h" |
| 38 | +static RegexLiteralLexingFn regexLiteralLexingFn = nullptr; |
| 39 | +void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) { |
| 40 | + regexLiteralLexingFn = fn; |
| 41 | +} |
| 42 | + |
36 | 43 | using namespace swift; |
37 | 44 | using namespace swift::syntax; |
38 | 45 |
|
@@ -1951,36 +1958,46 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body, |
1951 | 1958 | } |
1952 | 1959 | } |
1953 | 1960 |
|
1954 | | -void Lexer::lexRegexLiteral(const char *TokStart) { |
| 1961 | +bool Lexer::tryLexRegexLiteral(const char *TokStart) { |
1955 | 1962 | assert(*TokStart == '\''); |
1956 | 1963 |
|
1957 | | - bool HadError = false; |
1958 | | - while (true) { |
1959 | | - // Check if we reached the end of the literal without terminating. |
1960 | | - if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') { |
1961 | | - diagnose(TokStart, diag::lex_unterminated_regex); |
1962 | | - return formToken(tok::unknown, TokStart); |
1963 | | - } |
| 1964 | + // We need to have experimental string processing enabled, and have the |
| 1965 | + // parsing logic for regex literals available. |
| 1966 | + if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn) |
| 1967 | + return false; |
1964 | 1968 |
|
1965 | | - const auto *CharStart = CurPtr; |
1966 | | - uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); |
1967 | | - if (CharValue == ~0U) { |
1968 | | - diagnose(CharStart, diag::lex_invalid_utf8); |
1969 | | - HadError = true; |
1970 | | - continue; |
1971 | | - } |
1972 | | - if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) { |
1973 | | - // Skip escaped delimiter or \. |
1974 | | - CurPtr++; |
1975 | | - } else if (CharValue == '\'') { |
1976 | | - // End of literal, stop. |
1977 | | - break; |
1978 | | - } |
| 1969 | + // Ask libswift to try and lex a regex literal. |
| 1970 | + // - Ptr will not be advanced if this is not for a regex literal. |
| 1971 | + // - ErrStr will be set if there is any error to emit. |
| 1972 | + // - CompletelyErroneous will be set if there was an error that cannot be |
| 1973 | + // recovered from. |
| 1974 | + auto *Ptr = TokStart; |
| 1975 | + const char *ErrStr = nullptr; |
| 1976 | + bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr); |
| 1977 | + if (ErrStr) |
| 1978 | + diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr); |
| 1979 | + |
| 1980 | + // If we didn't make any lexing progress, this isn't a regex literal and we |
| 1981 | + // should fallback to lexing as something else. |
| 1982 | + if (Ptr == TokStart) |
| 1983 | + return false; |
| 1984 | + |
| 1985 | + // Update to point to where we ended regex lexing. |
| 1986 | + assert(Ptr > TokStart && Ptr <= BufferEnd); |
| 1987 | + CurPtr = Ptr; |
| 1988 | + |
| 1989 | + // If the lexing was completely erroneous, form an unknown token. |
| 1990 | + if (CompletelyErroneous) { |
| 1991 | + assert(ErrStr); |
| 1992 | + formToken(tok::unknown, TokStart); |
| 1993 | + return true; |
1979 | 1994 | } |
1980 | | - if (HadError) |
1981 | | - return formToken(tok::unknown, TokStart); |
1982 | 1995 |
|
| 1996 | + // Otherwise, we either had a successful lex, or something that was |
| 1997 | + // recoverable. |
| 1998 | + assert(ErrStr || CurPtr[-1] == '\''); |
1983 | 1999 | formToken(tok::regex_literal, TokStart); |
| 2000 | + return true; |
1984 | 2001 | } |
1985 | 2002 |
|
1986 | 2003 | /// lexEscapedIdentifier: |
@@ -2528,11 +2545,11 @@ void Lexer::lexImpl() { |
2528 | 2545 |
|
2529 | 2546 | case '\'': |
2530 | 2547 | // If we have experimental string processing enabled, and have the parsing |
2531 | | - // logic for regex literals, lex a single quoted string as a regex literal. |
2532 | | - if (LangOpts.EnableExperimentalStringProcessing && |
2533 | | - Parser_hasParseRegexStrawperson()) { |
2534 | | - return lexRegexLiteral(TokStart); |
2535 | | - } |
| 2548 | + // logic for regex literals, try to lex a single quoted string as a regex |
| 2549 | + // literal. |
| 2550 | + if (tryLexRegexLiteral(TokStart)) |
| 2551 | + return; |
| 2552 | + |
2536 | 2553 | // Otherwise lex as a string literal and emit a diagnostic. |
2537 | 2554 | LLVM_FALLTHROUGH; |
2538 | 2555 | case '"': |
|
0 commit comments