@@ -107,28 +107,46 @@ func parseTest(
107107 serializedCaptures. deallocate ( )
108108}
109109
110+ /// Test delimiter lexing. Takes an input string that starts with a regex
111+ /// literal. If `ignoreTrailing` is true, there may be additional characters
112+ /// that follow the literal that are not considered part of it.
113+ @discardableResult
110114func delimiterLexingTest(
111- _ input: String , file: StaticString = #file, line: UInt = #line
112- ) {
115+ _ input: String , ignoreTrailing: Bool = false ,
116+ file: StaticString = #file, line: UInt = #line
117+ ) -> String {
113118 input. withCString ( encodedAs: UTF8 . self) { ptr in
114119 let endPtr = ptr + input. utf8. count
115120 let ( contents, delim, end) = try ! lexRegex ( start: ptr, end: endPtr)
116- XCTAssertEqual ( end, endPtr, file: file, line: line)
121+ if ignoreTrailing {
122+ XCTAssertNotEqual ( end, endPtr, file: file, line: line)
123+ } else {
124+ XCTAssertEqual ( end, endPtr, file: file, line: line)
125+ }
117126
118- let ( parseContents, parseDelim) = droppingRegexDelimiters ( input)
127+ let rawPtr = UnsafeRawPointer ( ptr)
128+ let buffer = UnsafeRawBufferPointer ( start: rawPtr, count: end - rawPtr)
129+ let literal = String ( decoding: buffer, as: UTF8 . self)
130+
131+ let ( parseContents, parseDelim) = droppingRegexDelimiters ( literal)
119132 XCTAssertEqual ( contents, parseContents, file: file, line: line)
120133 XCTAssertEqual ( delim, parseDelim, file: file, line: line)
134+ return literal
121135 }
122136}
123137
138+ /// Test parsing an input string with regex delimiters. If `ignoreTrailing` is
139+ /// true, there may be additional characters that follow the literal that are
140+ /// not considered part of it.
124141func parseWithDelimitersTest(
125- _ input: String , _ expecting: AST . Node ,
142+ _ input: String , _ expecting: AST . Node , ignoreTrailing : Bool = false ,
126143 file: StaticString = #file, line: UInt = #line
127144) {
128145 // First try lexing.
129- delimiterLexingTest ( input, file: file, line: line)
146+ let literal = delimiterLexingTest (
147+ input, ignoreTrailing: ignoreTrailing, file: file, line: line)
130148
131- let orig = try ! parseWithDelimiters ( input )
149+ let orig = try ! parseWithDelimiters ( literal )
132150 let ast = orig. root
133151 guard ast == expecting
134152 || ast. _dump ( ) == expecting. _dump ( ) // EQ workaround
@@ -1509,6 +1527,63 @@ extension RegexTests {
15091527
15101528 // Printable ASCII characters.
15111529 delimiterLexingTest ( ##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"## )
1530+
1531+ // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter
1532+ // if it's clear that it's part of the regex syntax.
1533+
1534+ parseWithDelimitersTest (
1535+ #"re'(?'a_bcA0'\')'"# , namedCapture ( " a_bcA0 " , " ' " ) )
1536+ parseWithDelimitersTest (
1537+ #"re'(?'a_bcA0-c1A'x*)'"# ,
1538+ balancedCapture ( name: " a_bcA0 " , priorName: " c1A " , zeroOrMore ( of: " x " ) ) )
1539+
1540+ parseWithDelimitersTest (
1541+ #"re'(?('a_bcA0')x|y)'"# , conditional (
1542+ . groupMatched( ref ( " a_bcA0 " ) ) , trueBranch: " x " , falseBranch: " y " ) )
1543+ parseWithDelimitersTest (
1544+ #"re'(?('+20')\')'"# , conditional (
1545+ . groupMatched( ref ( plus: 20 ) ) , trueBranch: " ' " , falseBranch: empty ( ) ) )
1546+
1547+ parseWithDelimitersTest (
1548+ #"re'a\k'b0A''"# , concat ( " a " , backreference ( . named( " b0A " ) ) ) )
1549+ parseWithDelimitersTest (
1550+ #"re'\k'+2-1''"# , backreference ( . relative( 2 ) , recursionLevel: - 1 ) )
1551+
1552+ parseWithDelimitersTest (
1553+ #"re'a\g'b0A''"# , concat ( " a " , subpattern ( . named( " b0A " ) ) ) )
1554+ parseWithDelimitersTest (
1555+ #"re'\g'-1'\''"# , concat ( subpattern ( . relative( - 1 ) ) , " ' " ) )
1556+
1557+ parseWithDelimitersTest (
1558+ #"re'(?C'a*b\c 🔥_ ;')'"# , pcreCallout ( . string( #"a*b\c 🔥_ ;"# ) ) )
1559+
1560+ // Fine, because we don't end up skipping.
1561+ delimiterLexingTest ( #"re'(?'"# )
1562+ delimiterLexingTest ( #"re'(?('"# )
1563+ delimiterLexingTest ( #"re'\k'"# )
1564+ delimiterLexingTest ( #"re'\g'"# )
1565+ delimiterLexingTest ( #"re'(?C'"# )
1566+
1567+ // Not a valid group name, but we can still skip over it.
1568+ delimiterLexingTest ( #"re'(?'🔥')'"# )
1569+
1570+ // Escaped, so don't skip. These will ignore the ending `'` as we've already
1571+ // closed the literal.
1572+ parseWithDelimitersTest (
1573+ #"re'\(?''"# , zeroOrOne ( of: " ( " ) , ignoreTrailing: true
1574+ )
1575+ parseWithDelimitersTest (
1576+ #"re'\\k''"# , concat ( " \\ " , " k " ) , ignoreTrailing: true
1577+ )
1578+ parseWithDelimitersTest (
1579+ #"re'\\g''"# , concat ( " \\ " , " g " ) , ignoreTrailing: true
1580+ )
1581+ parseWithDelimitersTest (
1582+ #"re'\(?C''"# , concat ( zeroOrOne ( of: " ( " ) , " C " ) , ignoreTrailing: true
1583+ )
1584+ delimiterLexingTest ( #"re'(\?''"# , ignoreTrailing: true )
1585+ delimiterLexingTest ( #"re'\(?(''"# , ignoreTrailing: true )
1586+
15121587 // MARK: Parse not-equal
15131588
15141589 // Make sure dumping output correctly reflects differences in AST.
@@ -1815,6 +1890,12 @@ extension RegexTests {
18151890 diagnosticTest ( #"(?<#>)"# , . identifierMustBeAlphaNumeric( . groupName) )
18161891 diagnosticTest ( #"(?'1A')"# , . identifierCannotStartWithNumber( . groupName) )
18171892
1893+ // TODO: It might be better if tried to consume up to the closing `'` and
1894+ // diagnosed an invalid group name based on that.
1895+ diagnosticTest ( #"(?'abc ')"# , . expected( " ' " ) )
1896+
1897+ diagnosticTest ( " (?'🔥') " , . identifierMustBeAlphaNumeric( . groupName) )
1898+
18181899 diagnosticTest ( #"(?'-')"# , . expectedIdentifier( . groupName) )
18191900 diagnosticTest ( #"(?'--')"# , . identifierMustBeAlphaNumeric( . groupName) )
18201901 diagnosticTest ( #"(?'a-b-c')"# , . expected( " ' " ) )
@@ -1928,13 +2009,24 @@ extension RegexTests {
19282009 }
19292010
19302011 func testDelimiterLexingErrors( ) {
2012+
2013+ // MARK: Printable ASCII
2014+
19312015 delimiterLexingDiagnosticTest ( #"re'\\#n'"# , . endOfString)
19322016 for i : UInt8 in 0x1 ..< 0x20 where i != 0xA && i != 0xD { // U+A & U+D are \n and \r.
19332017 delimiterLexingDiagnosticTest ( " re' \( UnicodeScalar ( i) ) ' " , . unprintableASCII)
19342018 }
19352019 delimiterLexingDiagnosticTest ( " re' \n ' " , . endOfString)
19362020 delimiterLexingDiagnosticTest ( " re' \r ' " , . endOfString)
19372021 delimiterLexingDiagnosticTest ( " re' \u{7F} ' " , . unprintableASCII)
2022+
2023+ // MARK: Delimiter skipping
2024+
2025+ delimiterLexingDiagnosticTest ( " re'(?'' " , . endOfString)
2026+ delimiterLexingDiagnosticTest ( " re'(?'abc' " , . endOfString)
2027+ delimiterLexingDiagnosticTest ( " re'(?('abc' " , . endOfString)
2028+ delimiterLexingDiagnosticTest ( #"re'\k'ab_c0+-'"# , . endOfString)
2029+ delimiterLexingDiagnosticTest ( #"re'\g'ab_c0+-'"# , . endOfString)
19382030 }
19392031
19402032 func testlibswiftDiagnostics( ) {
0 commit comments