@@ -891,8 +891,7 @@ extension RegexTests {
891891 input: " Price: 100 dollars " , match: nil )
892892 firstMatchTest (
893893 #"(?=\d+ dollars)\d+"# ,
894- input: " Price: 100 dollars " , match: " 100 " ,
895- xfail: true ) // TODO
894+ input: " Price: 100 dollars " , match: " 100 " )
896895
897896 firstMatchTest (
898897 #"\d+(*pla: dollars)"# ,
@@ -917,6 +916,14 @@ extension RegexTests {
917916 #"\d+(*negative_lookahead: dollars)"# ,
918917 input: " Price: 100 pesos " , match: " 100 " )
919918
919+ // More complex lookaheads
920+ firstMatchTests (
921+ #"(?=.*e)(?=.*o)(?!.*z)."# ,
922+ ( input: " hello " , match: " h " ) ,
923+ ( input: " hzello " , match: " e " ) ,
924+ ( input: " hezllo " , match: nil ) ,
925+ ( input: " helloz " , match: nil ) )
926+
920927 firstMatchTest (
921928 #"(?<=USD)\d+"# , input: " Price: USD100 " , match: " 100 " , xfail: true )
922929 firstMatchTest (
@@ -1050,14 +1057,93 @@ extension RegexTests {
10501057 firstMatchTest (
10511058 #"(?:a|.b)c"# , input: " 123abcacxyz " , match: " abc " )
10521059 firstMatchTest (
1053- #"(?>a|.b)c"# , input: " 123abcacxyz " , match: " ac " , xfail : true )
1060+ #"(?>a|.b)c"# , input: " 123abcacxyz " , match: " ac " )
10541061 firstMatchTest (
1055- " (*atomic:a|.b)c " , input: " 123abcacxyz " , match: " ac " , xfail : true )
1062+ " (*atomic:a|.b)c " , input: " 123abcacxyz " , match: " ac " )
10561063 firstMatchTest (
10571064 #"(?:a+)[a-z]c"# , input: " 123aacacxyz " , match: " aac " )
10581065 firstMatchTest (
1059- #"(?>a+)[a-z]c"# , input: " 123aacacxyz " , match: " ac " , xfail: true )
1066+ #"(?>a+)[a-z]c"# , input: " 123aacacxyz " , match: nil )
1067+
1068+ // Atomicity should stay in the atomic group
1069+ firstMatchTest (
1070+ #"(?:(?>a)|.b)c"# , input: " 123abcacxyz " , match: " abc " )
1071+
1072+ // Quantifier behavior inside atomic groups
1073+
1074+ // (?:a+?) matches as few 'a's as possible, after matching the first
1075+ // (?>a+?) always matches exactly one 'a'
1076+ firstMatchTests (
1077+ #"^(?:a+?)a$"# ,
1078+ ( input: " a " , match: nil ) ,
1079+ ( input: " aa " , match: " aa " ) ,
1080+ ( input: " aaa " , match: " aaa " ) )
1081+ firstMatchTests (
1082+ #"^(?>a+?)a$"# ,
1083+ ( input: " a " , match: nil ) ,
1084+ ( input: " aa " , match: " aa " ) ,
1085+ ( input: " aaa " , match: nil ) )
1086+
1087+ // (?:a?+) and (?>a?+) are equivalent: they match one 'a' if available
1088+ firstMatchTests (
1089+ #"^(?:a?+)a$"# ,
1090+ ( input: " a " , match: nil ) ,
1091+ xfail: true )
1092+ firstMatchTests (
1093+ #"^(?:a?+)a$"# ,
1094+ ( input: " aa " , match: " aa " ) ,
1095+ ( input: " aaa " , match: nil ) )
1096+ firstMatchTests (
1097+ #"^(?>a?+)a$"# ,
1098+ ( input: " a " , match: nil ) ,
1099+ ( input: " aa " , match: " aa " ) ,
1100+ ( input: " aaa " , match: nil ) )
10601101
1102+ // Capture behavior in non-atomic vs atomic groups
1103+ firstMatchTests (
1104+ #"(\d+)\w+\1"# ,
1105+ ( input: " 123x12 " , match: " 123x12 " ) , // `\w+` matches "3x" in this case
1106+ ( input: " 23x23 " , match: " 23x23 " ) ,
1107+ ( input: " 123x23 " , match: " 23x23 " ) )
1108+ firstMatchTests (
1109+ #"(?>(\d+))\w+\1"# ,
1110+ ( input: " 123x12 " , match: nil ) )
1111+ firstMatchTests (
1112+ #"(?>(\d+))\w+\1"# ,
1113+ ( input: " 23x23 " , match: " 23x23 " ) ,
1114+ ( input: " 123x23 " , match: " 23x23 " ) ,
1115+ xfail: true )
1116+
1117+ // Backreferences in lookaheads
1118+ firstMatchTests (
1119+ #"^(?=.*(.)(.)\2\1).+$"# ,
1120+ ( input: " abbba " , match: nil ) ,
1121+ ( input: " ABBA " , match: " ABBA " ) ,
1122+ ( input: " defABBAdef " , match: " defABBAdef " ) )
1123+ firstMatchTests (
1124+ #"^(?=.*(.)(.)\2\1).+\2$"# ,
1125+ ( input: " abbba " , match: nil ) ,
1126+ ( input: " ABBA " , match: nil ) ,
1127+ ( input: " defABBAdef " , match: nil ) )
1128+ // FIXME: Backreferences don't escape positive lookaheads
1129+ firstMatchTests (
1130+ #"^(?=.*(.)(.)\2\1).+\2$"# ,
1131+ ( input: " ABBAB " , match: " ABBAB " ) ,
1132+ ( input: " defABBAdefB " , match: " defABBAdefB " ) ,
1133+ xfail: true )
1134+
1135+ firstMatchTests (
1136+ #"^(?!.*(.)(.)\2\1).+$"# ,
1137+ ( input: " abbba " , match: " abbba " ) ,
1138+ ( input: " ABBA " , match: nil ) ,
1139+ ( input: " defABBAdef " , match: nil ) )
1140+ // Backreferences don't escape negative lookaheads;
1141+ // matching only proceeds when the lookahead fails
1142+ firstMatchTests (
1143+ #"^(?!.*(.)(.)\2\1).+\2$"# ,
1144+ ( input: " abbba " , match: nil ) ,
1145+ ( input: " abbbab " , match: nil ) ,
1146+ ( input: " ABBAB " , match: nil ) )
10611147
10621148 // TODO: Test example where non-atomic is significant
10631149 firstMatchTest (
0 commit comments