@@ -219,17 +219,28 @@ fileprivate extension Compiler.ByteCodeGen {
219
219
return
220
220
}
221
221
222
- if s. value < 0x300 {
223
- // lily todo: make sure this is correct + add compiler option check after it's merged in
224
- // we unconditionally match against the scalar using consumeScalar in the else case
225
- // so maybe this check is uneccessary??
222
+ // if s.value < 0x300 {
223
+ // // lily todo: make sure this is correct + add compiler option check after it's merged in
224
+ //
225
+ // // we unconditionally match against the scalar using consumeScalar in the else case
226
+ // // so maybe this check is uneccessary??
227
+ // // I thought having it be < 0x300 made sure we didn't have to worry about any combining stuff
228
+ // // but in the else case we just unconditionally consume and check the value
229
+ // // i think this is all redundant
230
+ // builder.buildMatchScalar(s, boundaryCheck: false)
231
+ // return
232
+ // }
233
+ //
234
+ // builder.buildConsume(by: consumeScalar {
235
+ // $0 == s
236
+ // })
237
+ if optimizationsEnabled {
226
238
builder. buildMatchScalar ( s, boundaryCheck: false )
227
- return
239
+ } else {
240
+ builder. buildConsume ( by: consumeScalar {
241
+ $0 == s
242
+ } )
228
243
}
229
-
230
- builder. buildConsume ( by: consumeScalar {
231
- $0 == s
232
- } )
233
244
}
234
245
235
246
mutating func emitCharacter( _ c: Character ) throws {
@@ -265,7 +276,7 @@ fileprivate extension Compiler.ByteCodeGen {
265
276
// we can only match against characters that have a single cannonical equivalence
266
277
// so I think that rules out any latin in here, so just use ascii for now
267
278
// we also need to exclude our good non-single-scalar-ascii friend cr-lf
268
- if c. isASCII && c != " \r \n " {
279
+ if optimizationsEnabled && c. isASCII && c != " \r \n " {
269
280
builder. buildMatchScalar ( c. unicodeScalars. first!, boundaryCheck: true )
270
281
return
271
282
}
@@ -782,12 +793,24 @@ fileprivate extension Compiler.ByteCodeGen {
782
793
// lily todo: which strings are nfc invariant and matchable by direct scalar comparison?
783
794
// alternatively: loop over characters in s and emit either matchScalar or matchCharacter depending on if it is NFC invariant
784
795
// getting rid of matchSeq entirely does also get rid of the weird ARC
785
- if s. allSatisfy ( { c in c. isASCII && c != " \r \n " } ) {
786
- for scalar in s. unicodeScalars. dropLast ( 1 ) {
787
- builder. buildMatchScalar ( scalar, boundaryCheck: false )
796
+ if optimizationsEnabled {
797
+ for c in s {
798
+ // Each character needs to be NFC invariant in order for us to match it directly by scalar value in grapheme cluster mode
799
+ // lily temp: use isASCII for now, ask alex what exactly this check should be
800
+ if c. isASCII && c != " \r \n " {
801
+ builder. buildMatchScalar ( c. unicodeScalars. first!, boundaryCheck: false )
802
+ } else {
803
+ // let's think about this carefully
804
+ // what if our quoted literal is an ascii character + combining accent
805
+ // what are the characters in the loop?
806
+
807
+ // I believe that if we ever have ascii + combining character in our input
808
+ // string will automatically combine them into a unified character, so itll fall into this case
809
+
810
+ // so I don't think we ever need that boundaryCheck to be enabled, except at the end of this sequence
811
+ builder. buildMatch ( c)
812
+ }
788
813
}
789
- // check that we are on a boundary at the end and there isn't a combining character after this scalar
790
- builder. buildMatchScalar ( s. unicodeScalars. last!, boundaryCheck: true )
791
814
} else {
792
815
builder. buildMatchSequence ( s)
793
816
}
0 commit comments