@@ -631,8 +631,15 @@ extension Source {
631631 ) throws -> Located < AST . Group . Kind > ? {
632632 try recordLoc { src in
633633 try src. tryEating { src in
634- guard src. tryEat ( " ( " ) else { return nil }
634+ // There are some atoms that syntactically look like groups, bail here
635+ // if we see any. Care needs to be taken here as e.g a group starting
636+ // with '(?-' is a subpattern if the next character is a digit,
637+ // otherwise a matching option specifier. Conversely, '(?P' can be the
638+ // start of a matching option sequence, or a reference if it is followed
639+ // by '=' or '<'.
640+ guard !src. shouldLexGroupLikeAtom ( ) else { return nil }
635641
642+ guard src. tryEat ( " ( " ) else { return nil }
636643 if src. tryEat ( " ? " ) {
637644 if src. tryEat ( " : " ) { return . nonCapture }
638645 if src. tryEat ( " | " ) { return . nonCaptureReset }
@@ -658,15 +665,6 @@ extension Source {
658665 return . namedCapture( name)
659666 }
660667
661- // Check if we can lex a group-like reference. Do this before matching
662- // options to avoid ambiguity with a group starting with (?-, which
663- // is a subpattern if the next character is a digit, otherwise a
664- // matching option specifier. In addition, we need to be careful with
665- // (?P, which can also be the start of a matching option sequence.
666- if src. canLexGroupLikeReference ( ) {
667- return nil
668- }
669-
670668 // Matching option changing group (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:).
671669 if let seq = try src. lexMatchingOptionSequence ( ) {
672670 if src. tryEat ( " : " ) {
@@ -1059,11 +1057,11 @@ extension Source {
10591057 for openChar: Character
10601058 ) -> Character {
10611059 switch openChar {
1060+ // Identically-balanced delimiters.
1061+ case " ' " , " \" " , " ` " , " ^ " , " % " , " # " , " $ " : return openChar
10621062 case " < " : return " > "
1063- case " ' " : return " ' "
10641063 case " { " : return " } "
1065- default :
1066- fatalError ( " Not implemented " )
1064+ default : fatalError ( " Not implemented " )
10671065 }
10681066 }
10691067
@@ -1204,6 +1202,24 @@ extension Source {
12041202 return src. canLexNumberedReference ( )
12051203 }
12061204
1205+ /// Whether a group specifier should be lexed as an atom instead of a group.
1206+ private func shouldLexGroupLikeAtom( ) -> Bool {
1207+ var src = self
1208+ guard src. tryEat ( " ( " ) else { return false }
1209+
1210+ if src. tryEat ( " ? " ) {
1211+ // The start of a reference '(?P=', '(?R', ...
1212+ if src. canLexGroupLikeReference ( ) { return true }
1213+
1214+ // The start of a callout.
1215+ if src. tryEat ( " C " ) { return true }
1216+
1217+ return false
1218+ }
1219+
1220+ return false
1221+ }
1222+
12071223 /// Consume an escaped atom, starting from after the backslash
12081224 ///
12091225 /// Escaped -> KeyboardModified | Builtin
@@ -1265,6 +1281,78 @@ extension Source {
12651281 }
12661282 }
12671283
1284+ /// Try to consume a callout.
1285+ ///
1286+ /// Callout -> '(?C' CalloutBody ')'
1287+ /// CalloutBody -> '' | <Number>
1288+ /// | '`' <String> '`'
1289+ /// | "'" <String> "'"
1290+ /// | '"' <String> '"'
1291+ /// | '^' <String> '^'
1292+ /// | '%' <String> '%'
1293+ /// | '#' <String> '#'
1294+ /// | '$' <String> '$'
1295+ /// | '{' <String> '}'
1296+ ///
1297+ mutating func lexCallout( ) throws -> AST . Atom . Callout ? {
1298+ guard tryEat ( sequence: " (?C " ) else { return nil }
1299+ let arg = try recordLoc { src -> AST . Atom . Callout . Argument in
1300+ // Parse '(?C' followed by a number.
1301+ if let num = try src. lexNumber ( ) {
1302+ return . number( num. value)
1303+ }
1304+ // '(?C)' is implicitly '(?C0)'.
1305+ if src. peek ( ) == " ) " {
1306+ return . number( 0 )
1307+ }
1308+ // Parse '(C?' followed by a set of balanced delimiters as defined by
1309+ // http://pcre.org/current/doc/html/pcre2pattern.html#SEC28
1310+ if let open = src. tryEat ( anyOf: " ` " , " ' " , " \" " , " ^ " , " % " , " # " , " $ " , " { " ) {
1311+ let closing = String ( Source . getClosingDelimiter ( for: open) )
1312+ return . string( try src. expectQuoted ( endingWith: closing) . value)
1313+ }
1314+ // If we don't know what this syntax is, consume up to the ending ')' and
1315+ // emit an error.
1316+ let remaining = src. lexUntil { $0. isEmpty || $0. tryEat ( " ) " ) } . value
1317+ if remaining. isEmpty {
1318+ throw ParseError . expected ( " ) " )
1319+ }
1320+ throw ParseError . unknownCalloutKind ( " (?C \( remaining) ) " )
1321+ }
1322+ try expect ( " ) " )
1323+ return . init( arg)
1324+ }
1325+
1326+ /// Consume a group-like atom, throwing an error if an atom could not be
1327+ /// produced.
1328+ ///
1329+ /// GroupLikeAtom -> GroupLikeReference | Callout | BacktrackingDirective
1330+ ///
1331+ mutating func expectGroupLikeAtom( ) throws -> AST . Atom . Kind {
1332+ try recordLoc { src in
1333+ // References that look like groups, e.g (?R), (?1), ...
1334+ if let ref = try src. lexGroupLikeReference ( ) {
1335+ return ref. value
1336+ }
1337+
1338+ // (?C)
1339+ if let callout = try src. lexCallout ( ) {
1340+ return . callout( callout)
1341+ }
1342+
1343+ // If we didn't produce an atom, consume up until a reasonable end-point
1344+ // and throw an error.
1345+ try src. expect ( " ( " )
1346+ let remaining = src. lexUntil {
1347+ $0. isEmpty || $0. tryEat ( anyOf: " : " , " ) " ) != nil
1348+ } . value
1349+ if remaining. isEmpty {
1350+ throw ParseError . expected ( " ) " )
1351+ }
1352+ throw ParseError . unknownGroupKind ( remaining)
1353+ } . value
1354+ }
1355+
12681356
12691357 /// Try to consume an Atom.
12701358 ///
@@ -1293,9 +1381,10 @@ extension Source {
12931381 return . property( prop)
12941382 }
12951383
1296- // References that look like groups, e.g (?R), (?1), ...
1297- if !customCC, let ref = try src. lexGroupLikeReference ( ) {
1298- return ref. value
1384+ // If we have group syntax that was skipped over in lexGroupStart, we
1385+ // need to handle it as an atom, or throw an error.
1386+ if !customCC && src. shouldLexGroupLikeAtom ( ) {
1387+ return try src. expectGroupLikeAtom ( )
12991388 }
13001389
13011390 let char = src. eat ( )
0 commit comments