@@ -168,7 +168,15 @@ extension Compiler.ByteCodeGen {
168168 }
169169
170170 mutating func emitCharacter( _ c: Character ) throws {
171- // FIXME: Does semantic level matter?
171+ // Unicode scalar matches the specific scalars that comprise a character
172+ if options. semanticLevel == . unicodeScalar {
173+ print ( " emitting ' \( c) ' as a sequence of \( c. unicodeScalars. count) scalars " )
174+ for scalar in c. unicodeScalars {
175+ try emitScalar ( scalar)
176+ }
177+ return
178+ }
179+
172180 if options. isCaseInsensitive && c. isCased {
173181 // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
174182 builder. buildConsume { input, bounds in
@@ -625,22 +633,44 @@ extension Compiler.ByteCodeGen {
625633 try emitAtom ( a)
626634
627635 case let . quotedLiteral( s) :
628- // TODO: Should this incorporate options?
629- if options. isCaseInsensitive {
630- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
631- builder. buildConsume { input, bounds in
632- var iterator = s. makeIterator ( )
636+ if options. semanticLevel == . graphemeCluster {
637+ if options. isCaseInsensitive {
638+ // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
639+ builder. buildConsume { input, bounds in
640+ var iterator = s. makeIterator ( )
641+ var currentIndex = bounds. lowerBound
642+ while let ch = iterator. next ( ) {
643+ guard currentIndex < bounds. upperBound,
644+ ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
645+ else { return nil }
646+ input. formIndex ( after: & currentIndex)
647+ }
648+ return currentIndex
649+ }
650+ } else {
651+ builder. buildMatchSequence ( s)
652+ }
653+ } else {
654+ builder. buildConsume {
655+ [ caseInsensitive = options. isCaseInsensitive] input, bounds in
656+ // TODO: Case folding
657+ var iterator = s. unicodeScalars. makeIterator ( )
633658 var currentIndex = bounds. lowerBound
634- while let ch = iterator. next ( ) {
635- guard currentIndex < bounds. upperBound,
636- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
637- else { return nil }
638- input. formIndex ( after: & currentIndex)
659+ while let scalar = iterator. next ( ) {
660+ guard currentIndex < bounds. upperBound else { return nil }
661+ if caseInsensitive {
662+ if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
663+ return nil
664+ }
665+ } else {
666+ if scalar != input. unicodeScalars [ currentIndex] {
667+ return nil
668+ }
669+ }
670+ input. unicodeScalars. formIndex ( after: & currentIndex)
639671 }
640672 return currentIndex
641673 }
642- } else {
643- builder. buildMatchSequence ( s)
644674 }
645675
646676 case let . regexLiteral( l) :
0 commit comments