@@ -31,6 +31,12 @@ extension AST {
3131 /// \u{...}, \0dd, \x{...}, ...
3232 case scalar( Scalar )
3333
34+ /// A whitespace-separated sequence of Unicode scalar values which are
35+ /// implicitly splatted out.
36+ ///
37+ /// `\u{A B C}` -> `\u{A}\u{B}\u{C}`
38+ case scalarSequence( ScalarSequence )
39+
3440 /// A Unicode property, category, or script, including those written using
3541 /// POSIX syntax.
3642 ///
@@ -84,6 +90,7 @@ extension AST.Atom {
8490 switch kind {
8591 case . char( let v) : return v
8692 case . scalar( let v) : return v
93+ case . scalarSequence( let v) : return v
8794 case . property( let v) : return v
8895 case . escaped( let v) : return v
8996 case . keyboardControl( let v) : return v
@@ -116,6 +123,18 @@ extension AST.Atom {
116123 self . location = location
117124 }
118125 }
126+
127+ public struct ScalarSequence : Hashable {
128+ public var scalars : [ Scalar ]
129+ public var trivia : [ AST . Trivia ]
130+
131+ public init ( _ scalars: [ Scalar ] , trivia: [ AST . Trivia ] ) {
132+ precondition ( scalars. count > 1 , " Expected multiple scalars " )
133+ self . scalars = scalars
134+ self . trivia = trivia
135+ }
136+ public var scalarValues : [ Unicode . Scalar ] { scalars. map ( \. value) }
137+ }
119138}
120139
121140extension AST . Atom {
@@ -725,8 +744,9 @@ extension AST.Atom {
725744 // the AST? Or defer for the matching engine?
726745 return nil
727746
728- case . property, . any, . startOfLine, . endOfLine, . backreference, . subpattern,
729- . callout, . backtrackingDirective, . changeMatchingOptions:
747+ case . scalarSequence, . property, . any, . startOfLine, . endOfLine,
748+ . backreference, . subpattern, . callout, . backtrackingDirective,
749+ . changeMatchingOptions:
730750 return nil
731751 }
732752 }
@@ -748,13 +768,21 @@ extension AST.Atom {
748768 /// A string literal representation of the atom, if possible.
749769 ///
750770 /// Individual characters are returned as-is, and Unicode scalars are
751- /// presented using "\u{nnnn }" syntax.
771+ /// presented using "\u{nn nn ... }" syntax.
752772 public var literalStringValue : String ? {
773+ func scalarLiteral( _ u: [ UnicodeScalar ] ) -> String {
774+ let digits = u. map { String ( $0. value, radix: 16 , uppercase: true ) }
775+ . joined ( separator: " " )
776+ return " \\ u{ \( digits) } "
777+ }
753778 switch kind {
754779 case . char( let c) :
755780 return String ( c)
756781 case . scalar( let s) :
757- return " \\ u{ \( String ( s. value. value, radix: 16 , uppercase: true ) ) } "
782+ return scalarLiteral ( [ s. value] )
783+
784+ case . scalarSequence( let s) :
785+ return scalarLiteral ( s. scalarValues)
758786
759787 case . keyboardControl( let x) :
760788 return " \\ C- \( x) "
0 commit comments