|
33 | 33 |
|
34 | 34 | #include <limits> |
35 | 35 |
|
36 | | -// Regex lexing delivered via libSwift. |
37 | | -#include "swift/Parse/ExperimentalRegexBridging.h" |
38 | | -static RegexLiteralLexingFn regexLiteralLexingFn = nullptr; |
39 | | -void Parser_registerRegexLiteralLexingFn(RegexLiteralLexingFn fn) { |
40 | | - regexLiteralLexingFn = fn; |
41 | | -} |
42 | | - |
43 | 36 | using namespace swift; |
44 | 37 | using namespace swift::syntax; |
45 | 38 |
|
@@ -1958,46 +1951,36 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body, |
1958 | 1951 | } |
1959 | 1952 | } |
1960 | 1953 |
|
1961 | | -bool Lexer::tryLexRegexLiteral(const char *TokStart) { |
| 1954 | +void Lexer::lexRegexLiteral(const char *TokStart) { |
1962 | 1955 | assert(*TokStart == '\''); |
1963 | 1956 |
|
1964 | | - // We need to have experimental string processing enabled, and have the |
1965 | | - // parsing logic for regex literals available. |
1966 | | - if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn) |
1967 | | - return false; |
1968 | | - |
1969 | | - // Ask libswift to try and lex a regex literal. |
1970 | | - // - Ptr will not be advanced if this is not for a regex literal. |
1971 | | - // - ErrStr will be set if there is any error to emit. |
1972 | | - // - CompletelyErroneous will be set if there was an error that cannot be |
1973 | | - // recovered from. |
1974 | | - auto *Ptr = TokStart; |
1975 | | - const char *ErrStr = nullptr; |
1976 | | - bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr); |
1977 | | - if (ErrStr) |
1978 | | - diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr); |
1979 | | - |
1980 | | - // If we didn't make any lexing progress, this isn't a regex literal and we |
1981 | | - // should fallback to lexing as something else. |
1982 | | - if (Ptr == TokStart) |
1983 | | - return false; |
1984 | | - |
1985 | | - // Update to point to where we ended regex lexing. |
1986 | | - assert(Ptr > TokStart && Ptr <= BufferEnd); |
1987 | | - CurPtr = Ptr; |
| 1957 | + bool HadError = false; |
| 1958 | + while (true) { |
| 1959 | + // Check if we reached the end of the literal without terminating. |
| 1960 | + if (CurPtr >= BufferEnd || *CurPtr == '\n' || *CurPtr == '\r') { |
| 1961 | + diagnose(TokStart, diag::lex_unterminated_regex); |
| 1962 | + return formToken(tok::unknown, TokStart); |
| 1963 | + } |
1988 | 1964 |
|
1989 | | - // If the lexing was completely erroneous, form an unknown token. |
1990 | | - if (CompletelyErroneous) { |
1991 | | - assert(ErrStr); |
1992 | | - formToken(tok::unknown, TokStart); |
1993 | | - return true; |
| 1965 | + const auto *CharStart = CurPtr; |
| 1966 | + uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); |
| 1967 | + if (CharValue == ~0U) { |
| 1968 | + diagnose(CharStart, diag::lex_invalid_utf8); |
| 1969 | + HadError = true; |
| 1970 | + continue; |
| 1971 | + } |
| 1972 | + if (CharValue == '\\' && (*CurPtr == '\'' || *CurPtr == '\\')) { |
| 1973 | + // Skip escaped delimiter or \. |
| 1974 | + CurPtr++; |
| 1975 | + } else if (CharValue == '\'') { |
| 1976 | + // End of literal, stop. |
| 1977 | + break; |
| 1978 | + } |
1994 | 1979 | } |
| 1980 | + if (HadError) |
| 1981 | + return formToken(tok::unknown, TokStart); |
1995 | 1982 |
|
1996 | | - // Otherwise, we either had a successful lex, or something that was |
1997 | | - // recoverable. |
1998 | | - assert(ErrStr || CurPtr[-1] == '\''); |
1999 | 1983 | formToken(tok::regex_literal, TokStart); |
2000 | | - return true; |
2001 | 1984 | } |
2002 | 1985 |
|
2003 | 1986 | /// lexEscapedIdentifier: |
@@ -2545,11 +2528,11 @@ void Lexer::lexImpl() { |
2545 | 2528 |
|
2546 | 2529 | case '\'': |
2547 | 2530 | // If we have experimental string processing enabled, and have the parsing |
2548 | | - // logic for regex literals, try to lex a single quoted string as a regex |
2549 | | - // literal. |
2550 | | - if (tryLexRegexLiteral(TokStart)) |
2551 | | - return; |
2552 | | - |
| 2531 | + // logic for regex literals, lex a single quoted string as a regex literal. |
| 2532 | + if (LangOpts.EnableExperimentalStringProcessing && |
| 2533 | + Parser_hasParseRegexStrawperson()) { |
| 2534 | + return lexRegexLiteral(TokStart); |
| 2535 | + } |
2553 | 2536 | // Otherwise lex as a string literal and emit a diagnostic. |
2554 | 2537 | LLVM_FALLTHROUGH; |
2555 | 2538 | case '"': |
|
0 commit comments