@@ -659,13 +659,61 @@ extension Source {
659659 }
660660 }
661661
662+ /// Consume a group name.
663+ private mutating func expectGroupName(
664+ endingWith ending: String , eatEnding: Bool = true
665+ ) throws -> Located < String > {
666+ let str = try recordLoc { src -> String in
667+ if src. isEmpty || src. tryEat ( sequence: ending) {
668+ throw ParseError . expectedGroupName
669+ }
670+ if src. peek ( ) !. isNumber {
671+ throw ParseError . groupNameCannotStartWithNumber
672+ }
673+ guard let str = src. tryEatPrefix ( \. isWordCharacter) ? . string else {
674+ throw ParseError . groupNameMustBeAlphaNumeric
675+ }
676+ return str
677+ }
678+ if eatEnding {
679+ try expect ( sequence: ending)
680+ }
681+ return str
682+ }
683+
684+ /// Consume a named group field, producing either a named capture or balanced
685+ /// capture.
686+ ///
687+ /// NamedGroup -> 'P<' GroupNameBody '>'
688+ /// | '<' GroupNameBody '>'
689+ /// | "'" GroupNameBody "'"
690+ /// GroupNameBody -> \w+ | \w* '-' \w+
691+ ///
692+ private mutating func expectNamedGroup(
693+ endingWith ending: String
694+ ) throws -> AST . Group . Kind {
695+ func lexBalanced( _ lhs: Located < String > ? = nil ) throws -> AST . Group . Kind ? {
696+ // If we have a '-', this is a .NET-style 'balanced group'.
697+ guard let dash = tryEatWithLoc ( " - " ) else { return nil }
698+ let rhs = try expectGroupName ( endingWith: ending)
699+ return . balancedCapture( . init( name: lhs, dash: dash, priorName: rhs) )
700+ }
701+
702+ // Lex a group name, trying to lex a '-rhs' for a balanced capture group
703+ // both before and after.
704+ if let b = try lexBalanced ( ) { return b }
705+ let name = try expectGroupName ( endingWith: ending, eatEnding: false )
706+ if let b = try lexBalanced ( name) { return b }
707+
708+ try expect ( sequence: ending)
709+ return . namedCapture( name)
710+ }
711+
662712 /// Try to consume the start of a group
663713 ///
664714 /// GroupStart -> '(?' GroupKind | '('
665- /// GroupKind -> Named | ':' | '|' | '>' | '=' | '!' | '*' | '<=' | '<!'
666- /// | '<*' | MatchingOptionSeq (':' | ')')
667- /// Named -> '<' [^'>']+ '>' | 'P<' [^'>']+ '>'
668- /// | '\'' [^'\'']+ '\''
715+ /// GroupKind -> ':' | '|' | '>' | '=' | '!' | '*' | '<=' | '<!' | '<*'
716+ /// | NamedGroup | MatchingOptionSeq (':' | ')')
669717 ///
670718 /// If `SyntaxOptions.experimentalGroups` is enabled, also accepts:
671719 ///
@@ -709,16 +757,11 @@ extension Source {
709757 if src. tryEat ( sequence: " <* " ) { return . nonAtomicLookbehind }
710758
711759 // Named
712- // TODO: Group name validation, PCRE (and ICU + Oniguruma as far as I
713- // can tell), enforce word characters only, with the first character
714- // being a non-digit.
715760 if src. tryEat ( " < " ) || src. tryEat ( sequence: " P< " ) {
716- let name = try src. expectQuoted ( endingWith: " > " )
717- return . namedCapture( name)
761+ return try src. expectNamedGroup ( endingWith: " > " )
718762 }
719763 if src. tryEat ( " ' " ) {
720- let name = try src. expectQuoted ( endingWith: " ' " )
721- return . namedCapture( name)
764+ return try src. expectNamedGroup ( endingWith: " ' " )
722765 }
723766
724767 // Matching option changing group (?iJmnsUxxxDPSWy{..}-iJmnsUxxxDPSW:).
@@ -853,9 +896,9 @@ extension Source {
853896 // FIXME: This should apply to future groups too.
854897 // TODO: We should probably advise users to use the more explicit
855898 // syntax.
856- let nameRef = try src. expectNamedReference (
857- endingWith : " ) " , eatEnding: false )
858- if context. isPriorGroupRef ( nameRef. kind) {
899+ if let nameRef = src. lexNamedReference ( endingWith : " ) " ,
900+ eatEnding: false ) ,
901+ context. isPriorGroupRef ( nameRef. kind) {
859902 return . groupMatched( nameRef)
860903 }
861904 return nil
@@ -1046,11 +1089,20 @@ extension Source {
10461089 private mutating func expectNamedReference(
10471090 endingWith end: String , eatEnding: Bool = true
10481091 ) throws -> AST . Reference {
1049- // TODO: Group name validation, see comment in lexGroupStart.
1050- let str = try expectQuoted ( endingWith: end, eatEnding: eatEnding)
1092+ let str = try expectGroupName ( endingWith: end, eatEnding: eatEnding)
10511093 return . init( . named( str. value) , innerLoc: str. location)
10521094 }
10531095
1096+ /// Try to consume a named reference up to a closing delimiter, returning
1097+ /// `nil` if the characters aren't valid for a named reference.
1098+ private mutating func lexNamedReference(
1099+ endingWith end: String , eatEnding: Bool = true
1100+ ) -> AST . Reference ? {
1101+ tryEating { src in
1102+ try ? src. expectNamedReference ( endingWith: end, eatEnding: eatEnding)
1103+ }
1104+ }
1105+
10541106 /// Try to lex a numbered reference, or otherwise a named reference.
10551107 ///
10561108 /// NameOrNumberRef -> NumberRef | <String>
0 commit comments