@@ -85,6 +85,55 @@ class RegexDSLTests: XCTestCase {
8585 == Tuple3 ( " b " , " cccc " , [ " d " , " d " , " d " ] ) )
8686 }
8787
88+ func testQuantificationWithTransformedCapture( ) throws {
89+ // This test is to make sure transformed capture type information is
90+ // correctly propagated from the DSL into the bytecode and that the engine
91+ // is reconstructing the right types upon quantification (both empty and
92+ // non-empty).
93+ enum Word : Int32 {
94+ case apple
95+ case orange
96+
97+ init ? ( _ string: Substring ) {
98+ switch string {
99+ case " apple " : self = . apple
100+ case " orange " : self = . orange
101+ default : return nil
102+ }
103+ }
104+ }
105+ let regex = Regex {
106+ " a " .+
107+ OneOrMore ( . whitespace)
108+ Optionally {
109+ OneOrMore ( . digit) . capture { Int ( $0) ! }
110+ }
111+ Repeat {
112+ OneOrMore ( . whitespace)
113+ OneOrMore ( . word) . capture { Word ( $0) ! }
114+ }
115+ }
116+ // Assert the inferred capture type.
117+ let _: Tuple3 < Substring , Int ? , [ Word ] > . Type
118+ = type ( of: regex) . Match. self
119+ do {
120+ let input = " aaa 123 apple orange apple "
121+ let match = input. match ( regex) ? . match. tuple
122+ let ( whole, number, words) = try XCTUnwrap ( match)
123+ XCTAssertTrue ( whole == input)
124+ XCTAssertEqual ( number, 123 )
125+ XCTAssertEqual ( words, [ . apple, . orange, . apple] )
126+ }
127+ do {
128+ let input = " aaa "
129+ let match = input. match ( regex) ? . match. tuple
130+ let ( whole, number, words) = try XCTUnwrap ( match)
131+ XCTAssertTrue ( whole == input)
132+ XCTAssertEqual ( number, nil )
133+ XCTAssertTrue ( words. isEmpty)
134+ }
135+ }
136+
88137 // Note: Types of nested captures should be flat, but are currently nested
89138 // due to the lack of variadic generics. Without it, we cannot effectively
90139 // express type constraints to concatenate splatted tuples.
@@ -179,39 +228,46 @@ class RegexDSLTests: XCTestCase {
179228 let line = """
180229 A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
181230 """
182- let regex = Regex {
183- OneOrMore ( CharacterClass . hexDigit) . capture ( )
231+
232+ let regexWithCapture = Regex {
233+ OneOrMore ( CharacterClass . hexDigit) . capture ( Unicode . Scalar. init ( hex: ) )
184234 Optionally {
185235 " .. "
186- OneOrMore ( CharacterClass . hexDigit) . capture ( )
236+ OneOrMore ( CharacterClass . hexDigit) . capture ( Unicode . Scalar . init ( hex : ) )
187237 }
188238 OneOrMore ( CharacterClass . whitespace)
189239 " ; "
190240 OneOrMore ( CharacterClass . whitespace)
191241 OneOrMore ( CharacterClass . word) . capture ( )
192242 Repeat ( CharacterClass . any)
243+ } // Regex<(Substring, Unicode.Scalar?, Unicode.Scalar??, Substring)>
244+ do {
245+ // Assert the inferred capture type.
246+ typealias ExpectedMatch = Tuple4 <
247+ Substring , Unicode . Scalar ? , Unicode . Scalar ? ? , Substring
248+ >
249+ let _: ExpectedMatch . Type = type ( of: regexWithCapture) . Match. self
250+ let maybeMatchResult = line. match ( regexWithCapture)
251+ let matchResult = try XCTUnwrap ( maybeMatchResult)
252+ let ( wholeMatch, lower, upper, propertyString) = matchResult. match. tuple
253+ XCTAssertEqual ( wholeMatch, Substring ( line) )
254+ XCTAssertEqual ( lower, Unicode . Scalar ( 0xA6F0 ) )
255+ XCTAssertEqual ( upper, Unicode . Scalar ( 0xA6F1 ) )
256+ XCTAssertEqual ( propertyString, " Extend " )
193257 }
194- // Assert the inferred capture type.
195- typealias ExpectedMatch = Tuple4 <
196- Substring , Substring , Substring ? , Substring
197- >
198- let _: ExpectedMatch . Type = type ( of: regex) . Match. self
199- func run< R: RegexProtocol > (
200- _ regex: R
201- ) throws where R. Match == ExpectedMatch {
202- let maybeMatchResult = line. match ( regex)
258+
259+ do {
260+ let regexLiteral = try MockRegexLiteral (
261+ #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"# ,
262+ matching: Tuple4< Substring, Substring, Substring? , Substring> . self )
263+ let maybeMatchResult = line. match ( regexLiteral)
203264 let matchResult = try XCTUnwrap ( maybeMatchResult)
204265 let ( wholeMatch, lower, upper, propertyString) = matchResult. match. tuple
205266 XCTAssertEqual ( wholeMatch, Substring ( line) )
206267 XCTAssertEqual ( lower, " A6F0 " )
207268 XCTAssertEqual ( upper, " A6F1 " )
208269 XCTAssertEqual ( propertyString, " Extend " )
209270 }
210- let regexLiteral = try MockRegexLiteral (
211- #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"# ,
212- matching: Tuple4< Substring, Substring, Substring? , Substring> . self )
213- try run ( regex)
214- try run ( regexLiteral)
215271 }
216272
217273 func testDynamicCaptures( ) throws {
@@ -238,3 +294,13 @@ class RegexDSLTests: XCTestCase {
238294 }
239295 }
240296}
297+
298+ extension Unicode . Scalar {
299+ // Convert a hexadecimal string to a scalar
300+ public init ? < S: StringProtocol > ( hex: S ) {
301+ guard let val = UInt32 ( hex, radix: 16 ) , let scalar = Self ( val) else {
302+ return nil
303+ }
304+ self = scalar
305+ }
306+ }
0 commit comments