@@ -22,6 +22,14 @@ import XCTest
2222@testable // for internal `matches(of:)`
2323import _StringProcessing
2424
25+ extension UnicodeScalar {
26+ var value4Digits : String {
27+ let valueString = String ( value, radix: 16 , uppercase: true )
28+ if valueString. count >= 4 { return valueString }
29+ return String ( repeating: " 0 " , count: 4 - valueString. count) + valueString
30+ }
31+ }
32+
2533class UTS18Tests : XCTestCase {
2634 var input : String {
2735 " ABCdefghîøu \u{308} \u{FFF0} -–—[]123 "
@@ -262,21 +270,33 @@ extension UTS18Tests {
262270 09 \u{85} \
263271 10 \u{2028} \
264272 11 \u{2029} \
265-
273+ 12
266274 """
267275 // Check the input counts
268276 var lines = lineInput. matches ( of: regex ( #"\d{2}"# ) )
269- XCTAssertEqual ( lines. count, 11 )
277+ XCTAssertEqual ( lines. count, 12 )
270278 // Test \R - newline sequence
271- lines = lineInput. matches ( of: regex ( #"\d{2}\R"# ) )
279+ lines = lineInput. matches ( of: regex ( #"\d{2}\R^"# ) . anchorsMatchLineEndings ( ) )
280+ XCTAssertEqual ( lines. count, 11 )
281+ // Test \v - vertical space
282+ lines = lineInput. matches ( of: regex ( #"\d{2}\v^"# ) . anchorsMatchLineEndings ( ) )
272283 XCTAssertEqual ( lines. count, 11 )
273284 // Test anchors as line boundaries
274285 lines = lineInput. matches ( of: regex ( #"^\d{2}$"# ) . anchorsMatchLineEndings ( ) )
275- XCTAssertEqual ( lines. count, 11 )
286+ XCTAssertEqual ( lines. count, 12 )
276287 // Test that dot does not match line endings
277288 lines = lineInput. matches ( of: regex ( #".+"# ) )
278- XCTAssertEqual ( lines. count, 11 )
289+ XCTAssertEqual ( lines. count, 12 )
279290
291+ // Unicode scalar semantics - \R still matches all, including \r\n sequence
292+ lines = lineInput. matches (
293+ of: regex ( #"\d{2}\R(?=\d)"# ) . matchingSemantics ( . unicodeScalar) . anchorsMatchLineEndings ( ) )
294+ XCTAssertEqual ( lines. count, 11 )
295+ // Unicode scalar semantics - \v matches all except for \r\n sequence
296+ lines = lineInput. matches (
297+ of: regex ( #"\d{2}\v(?=\d)"# ) . matchingSemantics ( . unicodeScalar) . anchorsMatchLineEndings ( ) )
298+ XCTAssertEqual ( lines. count, 10 )
299+
280300 // Does not contain an empty line
281301 XCTAssertFalse ( lineInput. contains ( regex ( #"^$"# ) ) )
282302 // Does contain an empty line (between \n and \r, which are reversed here)
0 commit comments