@@ -361,38 +361,60 @@ extension DSLTree.CustomCharacterClass.Member {
361361 }
362362 return c
363363 case let . range( low, high) :
364- // TODO:
365- guard let lhs = low. literalCharacterValue else {
364+ guard let lhs = low. literalCharacterValue? . singleScalar, lhs. isNFC else {
366365 throw Unsupported ( " \( low) in range " )
367366 }
368- guard let rhs = high. literalCharacterValue else {
367+ guard let rhs = high. literalCharacterValue? . singleScalar , rhs . isNFC else {
369368 throw Unsupported ( " \( high) in range " )
370369 }
370+ guard lhs <= rhs else {
371+ throw Unsupported ( " Invalid range \( low) - \( high) " )
372+ }
371373
372- if opts. isCaseInsensitive {
373- let lhsLower = lhs. lowercased ( )
374- let rhsLower = rhs. lowercased ( )
375- guard lhsLower <= rhsLower else { throw Unsupported ( " Invalid range \( lhs) - \( rhs) " ) }
376- return { input, bounds in
377- // TODO: check for out of bounds?
378- let curIdx = bounds. lowerBound
379- if ( lhsLower... rhsLower) . contains ( input [ curIdx] . lowercased ( ) ) {
380- // TODO: semantic level
381- return input. index ( after: curIdx)
382- }
383- return nil
374+ let isCaseInsensitive = opts. isCaseInsensitive
375+ let isCharacterSemantic = opts. semanticLevel == . graphemeCluster
376+
377+ return { input, bounds in
378+ let curIdx = bounds. lowerBound
379+ let nextIndex = isCharacterSemantic
380+ ? input. index ( after: curIdx)
381+ : input. unicodeScalars. index ( after: curIdx)
382+
383+ // Under grapheme semantics, we compare based on single NFC scalars. If
384+ // such a character is not single scalar under NFC, the match fails. In
385+ // scalar semantics, we compare the exact scalar value to the NFC
386+ // bounds.
387+ let scalar = isCharacterSemantic ? input [ curIdx] . singleNFCScalar
388+ : input. unicodeScalars [ curIdx]
389+ guard let scalar = scalar else { return nil }
390+ let scalarRange = lhs ... rhs
391+ if scalarRange. contains ( scalar) {
392+ return nextIndex
384393 }
385- } else {
386- guard lhs <= rhs else { throw Unsupported ( " Invalid range \( lhs) - \( rhs) " ) }
387- return { input, bounds in
388- // TODO: check for out of bounds?
389- let curIdx = bounds. lowerBound
390- if ( lhs... rhs) . contains ( input [ curIdx] ) {
391- // TODO: semantic level
392- return input. index ( after: curIdx)
394+
395+ // Check for case insensitive matches.
396+ func matchesCased(
397+ _ cased: ( UnicodeScalar . Properties ) -> String
398+ ) -> Bool {
399+ let casedStr = cased ( scalar. properties)
400+ // In character semantic mode, we need to map to NFC. In scalar
401+ // semantics, we should have an exact scalar.
402+ let mapped = isCharacterSemantic ? casedStr. singleNFCScalar
403+ : casedStr. singleScalar
404+ guard let mapped = mapped else { return false }
405+ return scalarRange. contains ( mapped)
406+ }
407+ if isCaseInsensitive {
408+ if scalar. properties. changesWhenLowercased,
409+ matchesCased ( \. lowercaseMapping) {
410+ return nextIndex
411+ }
412+ if scalar. properties. changesWhenUppercased,
413+ matchesCased ( \. uppercaseMapping) {
414+ return nextIndex
393415 }
394- return nil
395416 }
417+ return nil
396418 }
397419
398420 case let . custom( ccc) :
0 commit comments