@@ -329,21 +329,23 @@ extension RegexTests {
329329 parseTest ( #"\070"# , scalar ( " \u{38} " ) )
330330 parseTest ( #"\07A"# , concat ( scalar ( " \u{7} " ) , " A " ) )
331331 parseTest ( #"\08"# , concat ( scalar ( " \u{0} " ) , " 8 " ) )
332- parseTest ( #"\0707"# , concat ( scalar ( " \u{38} " ) , " 7 " ) )
332+ parseTest ( #"\0707"# , scalar ( " \u{1C7} " ) )
333333
334334 parseTest ( #"[\0]"# , charClass ( scalar_m ( " \u{0} " ) ) )
335335 parseTest ( #"[\01]"# , charClass ( scalar_m ( " \u{1} " ) ) )
336336 parseTest ( #"[\070]"# , charClass ( scalar_m ( " \u{38} " ) ) )
337337
338338 parseTest ( #"[\07A]"# , charClass ( scalar_m ( " \u{7} " ) , " A " ) )
339339 parseTest ( #"[\08]"# , charClass ( scalar_m ( " \u{0} " ) , " 8 " ) )
340- parseTest ( #"[\0707]"# , charClass ( scalar_m ( " \u{38 } " ) , " 7 " ) )
340+ parseTest ( #"[\0707]"# , charClass ( scalar_m ( " \u{1C7 } " ) ) )
341341
342- parseTest ( #"[\1]"# , charClass ( scalar_m ( " \u{1} " ) ) )
343- parseTest ( #"[\123]"# , charClass ( scalar_m ( " \u{53} " ) ) )
344- parseTest ( #"[\101]"# , charClass ( scalar_m ( " \u{41} " ) ) )
345- parseTest ( #"[\7777]"# , charClass ( scalar_m ( " \u{1FF} " ) , " 7 " ) )
346- parseTest ( #"[\181]"# , charClass ( scalar_m ( " \u{1} " ) , " 8 " , " 1 " ) )
342+ // TODO: These are treated as octal sequences by PCRE, we should warn and
343+ // suggest user prefix with 0.
344+ parseTest ( #"[\1]"# , charClass ( " 1 " ) )
345+ parseTest ( #"[\123]"# , charClass ( " 1 " , " 2 " , " 3 " ) )
346+ parseTest ( #"[\101]"# , charClass ( " 1 " , " 0 " , " 1 " ) )
347+ parseTest ( #"[\7777]"# , charClass ( " 7 " , " 7 " , " 7 " , " 7 " ) )
348+ parseTest ( #"[\181]"# , charClass ( " 1 " , " 8 " , " 1 " ) )
347349
348350 // We take *up to* the first two valid digits for \x. No valid digits is 0.
349351 parseTest ( #"\x"# , scalar ( " \u{0} " ) )
@@ -492,6 +494,10 @@ extension RegexTests {
492494 #"a\Q \Q \\.\Eb"# ,
493495 concat ( " a " , quote ( #" \Q \\."# ) , " b " ) )
494496
497+ // These follow the PCRE behavior.
498+ parseTest ( #"\Q\\E"# , quote ( " \\ " ) )
499+ parseTest ( #"\E"# , " E " )
500+
495501 parseTest ( #"a" ."b"# , concat ( " a " , quote ( " . " ) , " b " ) ,
496502 syntax: . experimental)
497503 parseTest ( #"a" .""b""# , concat ( " a " , quote ( " . " ) , quote ( " b " ) ) ,
@@ -797,11 +803,9 @@ extension RegexTests {
797803 )
798804 }
799805
800- // TODO: Some of these behaviors are unintuitive, we should likely warn on
801- // some of them.
802- parseTest ( #"\10"# , scalar ( " \u{8} " ) )
803- parseTest ( #"\18"# , concat ( scalar ( " \u{1} " ) , " 8 " ) )
804- parseTest ( #"\7777"# , concat ( scalar ( " \u{1FF} " ) , " 7 " ) )
806+ parseTest ( #"\10"# , backreference ( . absolute( 10 ) ) )
807+ parseTest ( #"\18"# , backreference ( . absolute( 18 ) ) )
808+ parseTest ( #"\7777"# , backreference ( . absolute( 7777 ) ) )
805809 parseTest ( #"\91"# , backreference ( . absolute( 91 ) ) )
806810
807811 parseTest (
@@ -813,21 +817,22 @@ extension RegexTests {
813817 parseTest (
814818 #"()()()()()()()()()\10()"# ,
815819 concat ( Array ( repeating: capture ( empty ( ) ) , count: 9 )
816- + [ scalar ( " \u{8} " ) , capture ( empty ( ) ) ] ) ,
820+ + [ backreference ( . absolute ( 10 ) ) , capture ( empty ( ) ) ] ) ,
817821 captures: . tuple( Array ( repeating: . atom( ) , count: 10 ) )
818822 )
819- parseTest ( #"()()\10"# ,
820- concat ( capture ( empty ( ) ) , capture ( empty ( ) ) , scalar ( " \u{8} " ) ) ,
821- captures: . tuple( . atom( ) , . atom( ) ) )
823+ parseTest ( #"()()\10"# , concat (
824+ capture ( empty ( ) ) , capture ( empty ( ) ) , backreference ( . absolute( 10 ) ) ) ,
825+ captures: . tuple( . atom( ) , . atom( ) )
826+ )
822827
823828 // A capture of three empty captures.
824829 let fourCaptures = capture (
825830 concat ( capture ( empty ( ) ) , capture ( empty ( ) ) , capture ( empty ( ) ) )
826831 )
827832 parseTest (
828833 // There are 9 capture groups in total here.
829- #"((()()())(()()()))\10"# ,
830- concat ( capture ( concat ( fourCaptures, fourCaptures) ) , scalar ( " \u{8} " ) ) ,
834+ #"((()()())(()()()))\10"# , concat ( capture ( concat (
835+ fourCaptures, fourCaptures) ) , backreference ( . absolute ( 10 ) ) ) ,
831836 captures: . tuple( Array ( repeating: . atom( ) , count: 9 ) )
832837 )
833838 parseTest (
@@ -852,7 +857,7 @@ extension RegexTests {
852857 concat ( Array ( repeating: capture ( empty ( ) ) , count: 40 ) + [ scalar ( " " ) ] ) ,
853858 captures: . tuple( Array ( repeating: . atom( ) , count: 40 ) )
854859 )
855- parseTest ( #"\40"# , scalar ( " " ) )
860+ parseTest ( #"\40"# , backreference ( . absolute ( 40 ) ) )
856861 parseTest (
857862 String ( repeating: " () " , count: 40 ) + #"\40"# ,
858863 concat ( Array ( repeating: capture ( empty ( ) ) , count: 40 )
@@ -862,7 +867,7 @@ extension RegexTests {
862867
863868 parseTest ( #"\7"# , backreference ( . absolute( 7 ) ) )
864869
865- parseTest ( #"\11"# , scalar ( " \u{9} " ) )
870+ parseTest ( #"\11"# , backreference ( . absolute ( 11 ) ) )
866871 parseTest (
867872 String ( repeating: " () " , count: 11 ) + #"\11"# ,
868873 concat ( Array ( repeating: capture ( empty ( ) ) , count: 11 )
@@ -876,12 +881,11 @@ extension RegexTests {
876881 captures: . tuple( Array ( repeating: . atom( ) , count: 11 ) )
877882 )
878883
879- parseTest ( #"\0113"# , concat ( scalar ( " \u{9} " ) , " 3 " ) )
880- parseTest ( #"\113"# , scalar ( " \u{4B} " ) )
881- parseTest ( #"\377"# , scalar ( " \u{FF} " ) )
884+ parseTest ( #"\0113"# , scalar ( " \u{4B} " ) )
885+ parseTest ( #"\113"# , backreference ( . absolute ( 113 ) ) )
886+ parseTest ( #"\377"# , backreference ( . absolute ( 377 ) ) )
882887 parseTest ( #"\81"# , backreference ( . absolute( 81 ) ) )
883888
884-
885889 parseTest ( #"\g1"# , backreference ( . absolute( 1 ) ) )
886890 parseTest ( #"\g001"# , backreference ( . absolute( 1 ) ) )
887891 parseTest ( #"\g52"# , backreference ( . absolute( 52 ) ) )
0 commit comments