@@ -168,7 +168,15 @@ extension Compiler.ByteCodeGen {
168
168
}
169
169
170
170
mutating func emitCharacter( _ c: Character ) throws {
171
- // FIXME: Does semantic level matter?
171
+ // Unicode scalar matches the specific scalars that comprise a character
172
+ if options. semanticLevel == . unicodeScalar {
173
+ print ( " emitting ' \( c) ' as a sequence of \( c. unicodeScalars. count) scalars " )
174
+ for scalar in c. unicodeScalars {
175
+ try emitScalar ( scalar)
176
+ }
177
+ return
178
+ }
179
+
172
180
if options. isCaseInsensitive && c. isCased {
173
181
// TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
174
182
builder. buildConsume { input, bounds in
@@ -625,22 +633,44 @@ extension Compiler.ByteCodeGen {
625
633
try emitAtom ( a)
626
634
627
635
case let . quotedLiteral( s) :
628
- // TODO: Should this incorporate options?
629
- if options. isCaseInsensitive {
630
- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
631
- builder. buildConsume { input, bounds in
632
- var iterator = s. makeIterator ( )
636
+ if options. semanticLevel == . graphemeCluster {
637
+ if options. isCaseInsensitive {
638
+ // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
639
+ builder. buildConsume { input, bounds in
640
+ var iterator = s. makeIterator ( )
641
+ var currentIndex = bounds. lowerBound
642
+ while let ch = iterator. next ( ) {
643
+ guard currentIndex < bounds. upperBound,
644
+ ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
645
+ else { return nil }
646
+ input. formIndex ( after: & currentIndex)
647
+ }
648
+ return currentIndex
649
+ }
650
+ } else {
651
+ builder. buildMatchSequence ( s)
652
+ }
653
+ } else {
654
+ builder. buildConsume {
655
+ [ caseInsensitive = options. isCaseInsensitive] input, bounds in
656
+ // TODO: Case folding
657
+ var iterator = s. unicodeScalars. makeIterator ( )
633
658
var currentIndex = bounds. lowerBound
634
- while let ch = iterator. next ( ) {
635
- guard currentIndex < bounds. upperBound,
636
- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
637
- else { return nil }
638
- input. formIndex ( after: & currentIndex)
659
+ while let scalar = iterator. next ( ) {
660
+ guard currentIndex < bounds. upperBound else { return nil }
661
+ if caseInsensitive {
662
+ if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
663
+ return nil
664
+ }
665
+ } else {
666
+ if scalar != input. unicodeScalars [ currentIndex] {
667
+ return nil
668
+ }
669
+ }
670
+ input. unicodeScalars. formIndex ( after: & currentIndex)
639
671
}
640
672
return currentIndex
641
673
}
642
- } else {
643
- builder. buildMatchSequence ( s)
644
674
}
645
675
646
676
case let . regexLiteral( l) :
0 commit comments