@@ -608,6 +608,49 @@ extension Source {
608608 return . init( caretLoc: nil , adding: adding, minusLoc: nil , removing: [ ] )
609609 }
610610
611+ /// Try to consume explicitly spelled-out PCRE2 group syntax.
612+ mutating func lexExplicitPCRE2GroupStart( ) -> AST . Group . Kind ? {
613+ tryEating { src in
614+ guard src. tryEat ( sequence: " (* " ) else { return nil }
615+
616+ if src. tryEat ( sequence: " atomic: " ) {
617+ return . atomicNonCapturing
618+ }
619+ if src. tryEat ( sequence: " pla: " ) ||
620+ src. tryEat ( sequence: " positive_lookahead: " ) {
621+ return . lookahead
622+ }
623+ if src. tryEat ( sequence: " nla: " ) ||
624+ src. tryEat ( sequence: " negative_lookahead: " ) {
625+ return . negativeLookahead
626+ }
627+ if src. tryEat ( sequence: " plb: " ) ||
628+ src. tryEat ( sequence: " positive_lookbehind: " ) {
629+ return . lookbehind
630+ }
631+ if src. tryEat ( sequence: " nlb: " ) ||
632+ src. tryEat ( sequence: " negative_lookbehind: " ) {
633+ return . negativeLookbehind
634+ }
635+ if src. tryEat ( sequence: " napla: " ) ||
636+ src. tryEat ( sequence: " non_atomic_positive_lookahead: " ) {
637+ return . nonAtomicLookahead
638+ }
639+ if src. tryEat ( sequence: " naplb: " ) ||
640+ src. tryEat ( sequence: " non_atomic_positive_lookbehind: " ) {
641+ return . nonAtomicLookbehind
642+ }
643+ if src. tryEat ( sequence: " sr: " ) || src. tryEat ( sequence: " script_run: " ) {
644+ return . scriptRun
645+ }
646+ if src. tryEat ( sequence: " asr: " ) ||
647+ src. tryEat ( sequence: " atomic_script_run: " ) {
648+ return . atomicScriptRun
649+ }
650+ return nil
651+ }
652+ }
653+
611654 /// Try to consume the start of a group
612655 ///
613656 /// GroupStart -> '(?' GroupKind | '('
@@ -631,6 +674,11 @@ extension Source {
631674 ) throws -> Located < AST . Group . Kind > ? {
632675 try recordLoc { src in
633676 try src. tryEating { src in
677+ // Explicitly spelled out PRCE2 syntax for some groups. This needs to be
678+ // done before group-like atoms, as it uses the '(*' syntax, which is
679+ // otherwise a group-like atom.
680+ if let g = src. lexExplicitPCRE2GroupStart ( ) { return g }
681+
634682 // There are some atoms that syntactically look like groups, bail here
635683 // if we see any. Care needs to be taken here as e.g a group starting
636684 // with '(?-' is a subpattern if the next character is a digit,
@@ -691,45 +739,6 @@ extension Source {
691739 throw ParseError . unknownGroupKind ( " ? \( next) " )
692740 }
693741
694- // Explicitly spelled out PRCE2 syntax for some groups.
695- if src. tryEat ( " * " ) {
696- if src. tryEat ( sequence: " atomic: " ) { return . atomicNonCapturing }
697-
698- if src. tryEat ( sequence: " pla: " ) ||
699- src. tryEat ( sequence: " positive_lookahead: " ) {
700- return . lookahead
701- }
702- if src. tryEat ( sequence: " nla: " ) ||
703- src. tryEat ( sequence: " negative_lookahead: " ) {
704- return . negativeLookahead
705- }
706- if src. tryEat ( sequence: " plb: " ) ||
707- src. tryEat ( sequence: " positive_lookbehind: " ) {
708- return . lookbehind
709- }
710- if src. tryEat ( sequence: " nlb: " ) ||
711- src. tryEat ( sequence: " negative_lookbehind: " ) {
712- return . negativeLookbehind
713- }
714- if src. tryEat ( sequence: " napla: " ) ||
715- src. tryEat ( sequence: " non_atomic_positive_lookahead: " ) {
716- return . nonAtomicLookahead
717- }
718- if src. tryEat ( sequence: " naplb: " ) ||
719- src. tryEat ( sequence: " non_atomic_positive_lookbehind: " ) {
720- return . nonAtomicLookbehind
721- }
722- if src. tryEat ( sequence: " sr: " ) || src. tryEat ( sequence: " script_run: " ) {
723- return . scriptRun
724- }
725- if src. tryEat ( sequence: " asr: " ) ||
726- src. tryEat ( sequence: " atomic_script_run: " ) {
727- return . atomicScriptRun
728- }
729-
730- throw ParseError . misc ( " Quantifier '*' must follow operand " )
731- }
732-
733742 // (_:)
734743 if src. experimentalCaptures && src. tryEat ( sequence: " _: " ) {
735744 return . nonCapture
@@ -1216,6 +1225,8 @@ extension Source {
12161225
12171226 return false
12181227 }
1228+ // The start of a backreference directive.
1229+ if src. tryEat ( " * " ) { return true }
12191230
12201231 return false
12211232 }
@@ -1323,6 +1334,44 @@ extension Source {
13231334 return . init( arg)
13241335 }
13251336
1337+ /// Try to consume a backtracking directive.
1338+ ///
1339+ /// BacktrackingDirective -> '(*' BacktrackingDirectiveKind (':' <String>)? ')'
1340+ /// BacktrackingDirectiveKind -> 'ACCEPT' | 'FAIL' | 'F' | 'MARK' | ''
1341+ /// | 'COMMIT' | 'PRUNE' | 'SKIP' | 'THEN'
1342+ ///
1343+ mutating func lexBacktrackingDirective(
1344+ ) throws -> AST . Atom . BacktrackingDirective ? {
1345+ try tryEating { src in
1346+ guard src. tryEat ( sequence: " (* " ) else { return nil }
1347+ let kind = src. recordLoc { src -> AST . Atom . BacktrackingDirective . Kind ? in
1348+ if src. tryEat ( sequence: " ACCEPT " ) { return . accept }
1349+ if src. tryEat ( sequence: " FAIL " ) || src. tryEat ( " F " ) { return . fail }
1350+ if src. tryEat ( sequence: " MARK " ) || src. peek ( ) == " : " { return . mark }
1351+ if src. tryEat ( sequence: " COMMIT " ) { return . commit }
1352+ if src. tryEat ( sequence: " PRUNE " ) { return . prune }
1353+ if src. tryEat ( sequence: " SKIP " ) { return . skip }
1354+ if src. tryEat ( sequence: " THEN " ) { return . then }
1355+ return nil
1356+ }
1357+ guard let kind = kind else { return nil }
1358+ var name : Located < String > ?
1359+ if src. tryEat ( " : " ) {
1360+ // TODO: PCRE allows escaped delimiters or '\Q...\E' sequences in the
1361+ // name under PCRE2_ALT_VERBNAMES.
1362+ name = try src. expectQuoted ( endingWith: " ) " , eatEnding: false )
1363+ }
1364+ try src. expect ( " ) " )
1365+
1366+ // MARK directives must be named.
1367+ if name == nil && kind. value == . mark {
1368+ throw ParseError . backtrackingDirectiveMustHaveName (
1369+ String ( src [ kind. location. range] ) )
1370+ }
1371+ return . init( kind, name: name)
1372+ }
1373+ }
1374+
13261375 /// Consume a group-like atom, throwing an error if an atom could not be
13271376 /// produced.
13281377 ///
@@ -1340,6 +1389,11 @@ extension Source {
13401389 return . callout( callout)
13411390 }
13421391
1392+ // (*ACCEPT), (*FAIL), (*MARK), ...
1393+ if let b = try src. lexBacktrackingDirective ( ) {
1394+ return . backtrackingDirective( b)
1395+ }
1396+
13431397 // If we didn't produce an atom, consume up until a reasonable end-point
13441398 // and throw an error.
13451399 try src. expect ( " ( " )
0 commit comments