@@ -80,6 +80,55 @@ class RegexDSLTests: XCTestCase {
8080 == Tuple3 ( " b " , " cccc " , [ " d " , " d " , " d " ] ) )
8181 }
8282
83+ func testQuantificationWithTransformedCapture( ) throws {
84+ // This test is to make sure transformed capture type information is
85+ // correctly propagated from the DSL into the bytecode and that the engine
86+ // is reconstructing the right types upon quantification (both empty and
87+ // non-empty).
88+ enum Word : Int32 {
89+ case apple
90+ case orange
91+
92+ init ? ( _ string: Substring ) {
93+ switch string {
94+ case " apple " : self = . apple
95+ case " orange " : self = . orange
96+ default : return nil
97+ }
98+ }
99+ }
100+ let regex = Regex {
101+ " a " .+
102+ OneOrMore ( . whitespace)
103+ Optionally {
104+ OneOrMore ( . digit) . capture { Int ( $0) ! }
105+ }
106+ Repeat {
107+ OneOrMore ( . whitespace)
108+ OneOrMore ( . word) . capture { Word ( $0) ! }
109+ }
110+ }
111+ // Assert the inferred capture type.
112+ let _: Tuple3 < Substring , Int ? , [ Word ] > . Type
113+ = type ( of: regex) . Match. self
114+ do {
115+ let input = " aaa 123 apple orange apple "
116+ let match = input. match ( regex) ? . match. tuple
117+ let ( whole, number, words) = try XCTUnwrap ( match)
118+ XCTAssertTrue ( whole == input)
119+ XCTAssertEqual ( number, 123 )
120+ XCTAssertEqual ( words, [ . apple, . orange, . apple] )
121+ }
122+ do {
123+ let input = " aaa "
124+ let match = input. match ( regex) ? . match. tuple
125+ let ( whole, number, words) = try XCTUnwrap ( match)
126+ XCTAssertTrue ( whole == input)
127+ XCTAssertEqual ( number, nil )
128+ XCTAssertTrue ( words. isEmpty)
129+ }
130+ }
131+
83132 // Note: Types of nested captures should be flat, but are currently nested
84133 // due to the lack of variadic generics. Without it, we cannot effectively
85134 // express type constraints to concatenate splatted tuples.
@@ -174,39 +223,46 @@ class RegexDSLTests: XCTestCase {
174223 let line = """
175224 A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
176225 """
177- let regex = Regex {
178- OneOrMore ( CharacterClass . hexDigit) . capture ( )
226+
227+ let regexWithCapture = Regex {
228+ OneOrMore ( CharacterClass . hexDigit) . capture ( Unicode . Scalar. init ( hex: ) )
179229 Optionally {
180230 " .. "
181- OneOrMore ( CharacterClass . hexDigit) . capture ( )
231+ OneOrMore ( CharacterClass . hexDigit) . capture ( Unicode . Scalar . init ( hex : ) )
182232 }
183233 OneOrMore ( CharacterClass . whitespace)
184234 " ; "
185235 OneOrMore ( CharacterClass . whitespace)
186236 OneOrMore ( CharacterClass . word) . capture ( )
187237 Repeat ( CharacterClass . any)
238+ } // Regex<(Substring, Unicode.Scalar?, Unicode.Scalar??, Substring)>
239+ do {
240+ // Assert the inferred capture type.
241+ typealias ExpectedMatch = Tuple4 <
242+ Substring , Unicode . Scalar ? , Unicode . Scalar ? ? , Substring
243+ >
244+ let _: ExpectedMatch . Type = type ( of: regexWithCapture) . Match. self
245+ let maybeMatchResult = line. match ( regexWithCapture)
246+ let matchResult = try XCTUnwrap ( maybeMatchResult)
247+ let ( wholeMatch, lower, upper, propertyString) = matchResult. match. tuple
248+ XCTAssertEqual ( wholeMatch, Substring ( line) )
249+ XCTAssertEqual ( lower, Unicode . Scalar ( 0xA6F0 ) )
250+ XCTAssertEqual ( upper, Unicode . Scalar ( 0xA6F1 ) )
251+ XCTAssertEqual ( propertyString, " Extend " )
188252 }
189- // Assert the inferred capture type.
190- typealias ExpectedMatch = Tuple4 <
191- Substring , Substring , Substring ? , Substring
192- >
193- let _: ExpectedMatch . Type = type ( of: regex) . Match. self
194- func run< R: RegexProtocol > (
195- _ regex: R
196- ) throws where R. Match == ExpectedMatch {
197- let maybeMatchResult = line. match ( regex)
253+
254+ do {
255+ let regexLiteral = try MockRegexLiteral (
256+ #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"# ,
257+ matching: Tuple4< Substring, Substring, Substring? , Substring> . self )
258+ let maybeMatchResult = line. match ( regexLiteral)
198259 let matchResult = try XCTUnwrap ( maybeMatchResult)
199260 let ( wholeMatch, lower, upper, propertyString) = matchResult. match. tuple
200261 XCTAssertEqual ( wholeMatch, Substring ( line) )
201262 XCTAssertEqual ( lower, " A6F0 " )
202263 XCTAssertEqual ( upper, " A6F1 " )
203264 XCTAssertEqual ( propertyString, " Extend " )
204265 }
205- let regexLiteral = try MockRegexLiteral (
206- #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"# ,
207- matching: Tuple4< Substring, Substring, Substring? , Substring> . self )
208- try run ( regex)
209- try run ( regexLiteral)
210266 }
211267
212268 func testDynamicCaptures( ) throws {
@@ -233,3 +289,13 @@ class RegexDSLTests: XCTestCase {
233289 }
234290 }
235291}
292+
293+ extension Unicode . Scalar {
294+ // Convert a hexadecimal string to a scalar
295+ public init ? < S: StringProtocol > ( hex: S ) {
296+ guard let val = UInt32 ( hex, radix: 16 ) , let scalar = Self ( val) else {
297+ return nil
298+ }
299+ self = scalar
300+ }
301+ }
0 commit comments