@@ -428,6 +428,10 @@ extension RegexTests {
428428
429429 parseTest ( " [-] " , charClass ( " - " ) )
430430
431+ // Empty character classes are forbidden, therefore this is a character
432+ // class of literal ']'.
433+ parseTest ( " []] " , charClass ( " ] " ) )
434+
431435 // These are metacharacters in certain contexts, but normal characters
432436 // otherwise.
433437 parseTest (
@@ -494,6 +498,25 @@ extension RegexTests {
494498 parseTest ( " [*] " , charClass ( " * " ) )
495499 parseTest ( " [{0}] " , charClass ( " { " , " 0 " , " } " ) )
496500
501+ parseTest ( #"[\f-\e]"# , charClass (
502+ range_m ( . escaped( . formfeed) , . escaped( . escape) ) ) )
503+ parseTest ( #"[\a-\b]"# , charClass (
504+ range_m ( . escaped( . alarm) , . escaped( . backspace) ) ) )
505+ parseTest ( #"[\n-\r]"# , charClass (
506+ range_m ( . escaped( . newline) , . escaped( . carriageReturn) ) ) )
507+ parseTest ( #"[\t-\t]"# , charClass (
508+ range_m ( . escaped( . tab) , . escaped( . tab) ) ) )
509+
510+ parseTest ( #"[\cX-\cY\C-A-\C-B\M-\C-A-\M-\C-B\M-A-\M-B]"# , charClass (
511+ range_m ( . keyboardControl( " X " ) , . keyboardControl( " Y " ) ) ,
512+ range_m ( . keyboardControl( " A " ) , . keyboardControl( " B " ) ) ,
513+ range_m ( . keyboardMetaControl( " A " ) , . keyboardMetaControl( " B " ) ) ,
514+ range_m ( . keyboardMeta( " A " ) , . keyboardMeta( " B " ) )
515+ ) )
516+
517+ parseTest ( #"[\N{DOLLAR SIGN}-\N{APOSTROPHE}]"# , charClass (
518+ range_m ( . namedCharacter( " DOLLAR SIGN " ) , . namedCharacter( " APOSTROPHE " ) ) ) )
519+
497520 // MARK: Operators
498521
499522 parseTest (
@@ -544,9 +567,8 @@ extension RegexTests {
544567 #"a\Q \Q \\.\Eb"# ,
545568 concat ( " a " , quote ( #" \Q \\."# ) , " b " ) )
546569
547- // These follow the PCRE behavior.
570+ // This follows the PCRE behavior.
548571 parseTest ( #"\Q\\E"# , quote ( " \\ " ) )
549- parseTest ( #"\E"# , " E " )
550572
551573 parseTest ( #"a" ."b"# , concat ( " a " , quote ( " . " ) , " b " ) ,
552574 syntax: . experimental)
@@ -566,6 +588,25 @@ extension RegexTests {
566588
567589 parseTest ( #"["-"]"# , charClass ( range_m ( " \" " , " \" " ) ) )
568590
591+ // MARK: Escapes
592+
593+ // Not metachars, but we allow their escape as ASCII.
594+ parseTest ( #"\<"# , " < " )
595+ parseTest ( #"\ "# , " " )
596+ parseTest ( #"\\"# , " \\ " )
597+
598+ // Escaped U+3000 IDEOGRAPHIC SPACE.
599+ parseTest ( #"\\#u{3000}"# , " \u{3000} " )
600+
601+ // Control and meta controls.
602+ parseTest ( #"\c "# , atom ( . keyboardControl( " " ) ) )
603+ parseTest ( #"\c!"# , atom ( . keyboardControl( " ! " ) ) )
604+ parseTest ( #"\c~"# , atom ( . keyboardControl( " ~ " ) ) )
605+ parseTest ( #"\C--"# , atom ( . keyboardControl( " - " ) ) )
606+ parseTest ( #"\M-\C-a"# , atom ( . keyboardMetaControl( " a " ) ) )
607+ parseTest ( #"\M-\C--"# , atom ( . keyboardMetaControl( " - " ) ) )
608+ parseTest ( #"\M-a"# , atom ( . keyboardMeta( " a " ) ) )
609+
569610 // MARK: Comments
570611
571612 parseTest (
@@ -989,13 +1030,6 @@ extension RegexTests {
9891030 // Backreferences are not valid in custom character classes.
9901031 parseTest ( #"[\8]"# , charClass ( " 8 " ) )
9911032 parseTest ( #"[\9]"# , charClass ( " 9 " ) )
992- parseTest ( #"[\g]"# , charClass ( " g " ) )
993- parseTest ( #"[\g+30]"# , charClass ( " g " , " + " , " 3 " , " 0 " ) )
994- parseTest ( #"[\g{1}]"# , charClass ( " g " , " { " , " 1 " , " } " ) )
995- parseTest ( #"[\k'a']"# , charClass ( " k " , " ' " , " a " , " ' " ) )
996-
997- parseTest ( #"\g"# , atom ( . char( " g " ) ) )
998- parseTest ( #"\k"# , atom ( . char( " k " ) ) )
9991033
10001034 // MARK: Character names.
10011035
@@ -1526,7 +1560,7 @@ extension RegexTests {
15261560 parseWithDelimitersTest ( " re'x*' " , zeroOrMore ( of: " x " ) )
15271561
15281562 parseWithDelimitersTest ( #"re'🔥🇩🇰'"# , concat ( " 🔥 " , " 🇩🇰 " ) )
1529- parseWithDelimitersTest ( #"re'\ 🔥✅'"# , concat ( " 🔥 " , " ✅ " ) )
1563+ parseWithDelimitersTest ( #"re'🔥✅'"# , concat ( " 🔥 " , " ✅ " ) )
15301564
15311565 // Printable ASCII characters.
15321566 delimiterLexingTest ( ##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"## )
@@ -1871,10 +1905,37 @@ extension RegexTests {
18711905 diagnosticTest ( " (?<a-b " , . expected( " > " ) )
18721906 diagnosticTest ( " (?<a-b> " , . expected( " ) " ) )
18731907
1908+ // The first ']' of a custom character class is literal, so this is missing
1909+ // the closing bracket.
1910+ diagnosticTest ( " [] " , . expected( " ] " ) )
1911+
18741912 // MARK: Bad escapes
18751913
18761914 diagnosticTest ( " \\ " , . expectedEscape)
18771915
1916+ // TODO: Custom diagnostic for control sequence
1917+ diagnosticTest ( #"\c"# , . unexpectedEndOfInput)
1918+
1919+ // TODO: Custom diagnostic for expected backref
1920+ diagnosticTest ( #"\g"# , . invalidEscape( " g " ) )
1921+ diagnosticTest ( #"\k"# , . invalidEscape( " k " ) )
1922+
1923+ // TODO: Custom diagnostic for backref in custom char class
1924+ diagnosticTest ( #"[\g]"# , . invalidEscape( " g " ) )
1925+ diagnosticTest ( #"[\g+30]"# , . invalidEscape( " g " ) )
1926+ diagnosticTest ( #"[\g{1}]"# , . invalidEscape( " g " ) )
1927+ diagnosticTest ( #"[\k'a']"# , . invalidEscape( " k " ) )
1928+
1929+ // TODO: Custom diagnostic for missing '\Q'
1930+ diagnosticTest ( #"\E"# , . invalidEscape( " E " ) )
1931+
1932+ // Non-ASCII non-whitespace cases.
1933+ diagnosticTest ( #"\🔥"# , . invalidEscape( " 🔥 " ) )
1934+ diagnosticTest ( #"\🇩🇰"# , . invalidEscape( " 🇩🇰 " ) )
1935+ diagnosticTest ( #"\e\#u{301}"# , . invalidEscape( " e \u{301} " ) )
1936+ diagnosticTest ( #"\\#u{E9}"# , . invalidEscape( " é " ) )
1937+ diagnosticTest ( #"\˂"# , . invalidEscape( " ˂ " ) )
1938+
18781939 // MARK: Text Segment options
18791940
18801941 diagnosticTest ( " (?-y{g}) " , . cannotRemoveTextSegmentOptions)
0 commit comments