@@ -21,6 +21,16 @@ API convention:
2121- eat() and tryEat() is still used by the parser as a character-by-character interface
2222*/
2323
24+ extension Error {
25+ func addingLocation( _ loc: Range < Source . Position > ) -> Error {
26+ // If we're already a LocatedError, don't change the location.
27+ if self is _LocatedErrorProtocol {
28+ return self
29+ }
30+ return Source . LocatedError< Self> ( self , loc)
31+ }
32+ }
33+
2434extension Source {
2535 // MARK: - recordLoc
2636
@@ -51,12 +61,8 @@ extension Source {
5161 do {
5262 guard let result = try f ( & self ) else { return nil }
5363 return Located ( result, start..< currentPosition)
54- } catch let e as Source . LocatedError < ParseError > {
55- throw e
56- } catch let e as ParseError {
57- throw LocatedError ( e, start..< currentPosition)
58- } catch {
59- fatalError ( " FIXME: Let's not keep the boxed existential... " )
64+ } catch let e {
65+ throw e. addingLocation ( start..< currentPosition)
6066 }
6167 }
6268
@@ -706,6 +712,22 @@ extension Source {
706712 return . init( caretLoc: nil , adding: adding, minusLoc: nil , removing: [ ] )
707713 }
708714
715+ /// A matching option changing atom.
716+ ///
717+ /// '(?' MatchingOptionSeq ')'
718+ ///
719+ mutating func lexChangeMatchingOptionAtom(
720+ context: ParsingContext
721+ ) throws -> AST . MatchingOptionSequence ? {
722+ try tryEating { src in
723+ guard src. tryEat ( sequence: " (? " ) ,
724+ let seq = try src. lexMatchingOptionSequence ( context: context)
725+ else { return nil }
726+ try src. expect ( " ) " )
727+ return seq
728+ }
729+ }
730+
709731 /// Try to consume explicitly spelled-out PCRE2 group syntax.
710732 mutating func lexExplicitPCRE2GroupStart( ) -> AST . Group . Kind ? {
711733 tryEating { src in
@@ -846,7 +868,7 @@ extension Source {
846868 // otherwise a matching option specifier. Conversely, '(?P' can be the
847869 // start of a matching option sequence, or a reference if it is followed
848870 // by '=' or '<'.
849- guard !src. shouldLexGroupLikeAtom ( ) else { return nil }
871+ guard !src. shouldLexGroupLikeAtom ( context : context ) else { return nil }
850872
851873 guard src. tryEat ( " ( " ) else { return nil }
852874 if src. tryEat ( " ? " ) {
@@ -871,22 +893,13 @@ extension Source {
871893
872894 // Matching option changing group (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:).
873895 if let seq = try src. lexMatchingOptionSequence ( context: context) {
874- if src. tryEat ( " : " ) {
875- return . changeMatchingOptions( seq, isIsolated: false )
876- }
877- // If this isn't start of an explicit group, we should have an
878- // implicit group that covers the remaining elements of the current
879- // group.
880- // TODO: This implicit scoping behavior matches Oniguruma, but PCRE
881- // also does it across alternations, which will require additional
882- // handling.
883- guard src. tryEat ( " ) " ) else {
896+ guard src. tryEat ( " : " ) else {
884897 if let next = src. peek ( ) {
885898 throw ParseError . invalidMatchingOption ( next)
886899 }
887900 throw ParseError . expected ( " ) " )
888901 }
889- return . changeMatchingOptions( seq, isIsolated : true )
902+ return . changeMatchingOptions( seq)
890903 }
891904
892905 guard let next = src. peek ( ) else {
@@ -1035,18 +1048,8 @@ extension Source {
10351048 context: ParsingContext
10361049 ) throws -> Located < AST . Group . Kind > ? {
10371050 try tryEating { src in
1038- guard src. tryEat ( sequence: " (? " ) ,
1039- let group = try src. lexGroupStart ( context: context)
1040- else { return nil }
1041-
1042- // Implicitly scoped groups are not supported here.
1043- guard !group. value. hasImplicitScope else {
1044- throw LocatedError (
1045- ParseError . unsupportedCondition ( " implicitly scoped group " ) ,
1046- group. location
1047- )
1048- }
1049- return group
1051+ guard src. tryEat ( sequence: " (? " ) else { return nil }
1052+ return try src. lexGroupStart ( context: context)
10501053 }
10511054 }
10521055
@@ -1233,17 +1236,19 @@ extension Source {
12331236 allowWholePatternRef: Bool = false , allowRecursionLevel: Bool = false
12341237 ) throws -> AST . Reference ? {
12351238 let kind = try recordLoc { src -> AST . Reference . Kind ? in
1236- // Note this logic should match canLexNumberedReference.
1237- if src. tryEat ( " + " ) {
1238- return . relative( try src. expectNumber ( ) . value)
1239- }
1240- if src. tryEat ( " - " ) {
1241- return . relative( try - src. expectNumber ( ) . value)
1242- }
1243- if let num = try src. lexNumber ( ) {
1244- return . absolute( num. value)
1239+ try src. tryEating { src in
1240+ // Note this logic should match canLexNumberedReference.
1241+ if src. tryEat ( " + " ) , let num = try src. lexNumber ( ) {
1242+ return . relative( num. value)
1243+ }
1244+ if src. tryEat ( " - " ) , let num = try src. lexNumber ( ) {
1245+ return . relative( - num. value)
1246+ }
1247+ if let num = try src. lexNumber ( ) {
1248+ return . absolute( num. value)
1249+ }
1250+ return nil
12451251 }
1246- return nil
12471252 }
12481253 guard let kind = kind else { return nil }
12491254 guard allowWholePatternRef || kind. value != . recurseWholePattern else {
@@ -1472,8 +1477,21 @@ extension Source {
14721477 return src. canLexNumberedReference ( )
14731478 }
14741479
1480+ private func canLexMatchingOptionsAsAtom( context: ParsingContext ) -> Bool {
1481+ var src = self
1482+
1483+ // See if we can lex a matching option sequence that terminates in ')'. Such
1484+ // a sequence is an atom. If an error is thrown, there are invalid elements
1485+ // of the matching option sequence. In such a case, we can lex as a group
1486+ // and diagnose the invalid group kind.
1487+ guard ( try ? src. lexMatchingOptionSequence ( context: context) ) != nil else {
1488+ return false
1489+ }
1490+ return src. tryEat ( " ) " )
1491+ }
1492+
14751493 /// Whether a group specifier should be lexed as an atom instead of a group.
1476- private func shouldLexGroupLikeAtom( ) -> Bool {
1494+ private func shouldLexGroupLikeAtom( context : ParsingContext ) -> Bool {
14771495 var src = self
14781496 guard src. tryEat ( " ( " ) else { return false }
14791497
@@ -1487,6 +1505,9 @@ extension Source {
14871505 // The start of an Oniguruma 'of-contents' callout.
14881506 if src. tryEat ( " { " ) { return true }
14891507
1508+ // A matching option atom (?x), (?i), ...
1509+ if src. canLexMatchingOptionsAsAtom ( context: context) { return true }
1510+
14901511 return false
14911512 }
14921513 // The start of a backreference directive or Oniguruma named callout.
@@ -1747,13 +1768,20 @@ extension Source {
17471768 ///
17481769 /// GroupLikeAtom -> GroupLikeReference | Callout | BacktrackingDirective
17491770 ///
1750- mutating func expectGroupLikeAtom( ) throws -> AST . Atom . Kind {
1771+ mutating func expectGroupLikeAtom(
1772+ context: ParsingContext
1773+ ) throws -> AST . Atom . Kind {
17511774 try recordLoc { src in
17521775 // References that look like groups, e.g (?R), (?1), ...
17531776 if let ref = try src. lexGroupLikeReference ( ) {
17541777 return ref. value
17551778 }
17561779
1780+ // Change matching options atom (?i), (?x-i), ...
1781+ if let seq = try src. lexChangeMatchingOptionAtom ( context: context) {
1782+ return . changeMatchingOptions( seq)
1783+ }
1784+
17571785 // (*ACCEPT), (*FAIL), (*MARK), ...
17581786 if let b = try src. lexBacktrackingDirective ( ) {
17591787 return . backtrackingDirective( b)
@@ -1822,8 +1850,8 @@ extension Source {
18221850
18231851 // If we have group syntax that was skipped over in lexGroupStart, we
18241852 // need to handle it as an atom, or throw an error.
1825- if !customCC && src. shouldLexGroupLikeAtom ( ) {
1826- return try src. expectGroupLikeAtom ( )
1853+ if !customCC && src. shouldLexGroupLikeAtom ( context : context ) {
1854+ return try src. expectGroupLikeAtom ( context : context )
18271855 }
18281856
18291857 // A quantifier here is invalid.
@@ -1841,6 +1869,9 @@ extension Source {
18411869 }
18421870 throw Unreachable ( " TODO: reason " )
18431871
1872+ case " ( " where !customCC:
1873+ throw Unreachable ( " Should have lexed a group or group-like atom " )
1874+
18441875 // (sometimes) special metacharacters
18451876 case " . " : return customCC ? . char( " . " ) : . any
18461877 case " ^ " : return customCC ? . char( " ^ " ) : . startOfLine
0 commit comments