Skip to content

Commit 3a2b324

Browse files
committed
Remove matchseq entirely
1 parent 6f76f36 commit 3a2b324

File tree

1 file changed

+38
-15
lines changed

1 file changed

+38
-15
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,28 @@ fileprivate extension Compiler.ByteCodeGen {
219219
return
220220
}
221221

222-
if s.value < 0x300 {
223-
// lily todo: make sure this is correct + add compiler option check after it's merged in
224-
// we unconditionally match against the scalar using consumeScalar in the else case
225-
// so maybe this check is uneccessary??
222+
// if s.value < 0x300 {
223+
// // lily todo: make sure this is correct + add compiler option check after it's merged in
224+
//
225+
// // we unconditionally match against the scalar using consumeScalar in the else case
226+
// // so maybe this check is uneccessary??
227+
// // I thought having it be < 0x300 made sure we didn't have to worry about any combining stuff
228+
// // but in the else case we just unconditionally consume and check the value
229+
// // i think this is all redundant
230+
// builder.buildMatchScalar(s, boundaryCheck: false)
231+
// return
232+
// }
233+
//
234+
// builder.buildConsume(by: consumeScalar {
235+
// $0 == s
236+
// })
237+
if optimizationsEnabled {
226238
builder.buildMatchScalar(s, boundaryCheck: false)
227-
return
239+
} else {
240+
builder.buildConsume(by: consumeScalar {
241+
$0 == s
242+
})
228243
}
229-
230-
builder.buildConsume(by: consumeScalar {
231-
$0 == s
232-
})
233244
}
234245

235246
mutating func emitCharacter(_ c: Character) throws {
@@ -265,7 +276,7 @@ fileprivate extension Compiler.ByteCodeGen {
265276
// we can only match against characters that have a single cannonical equivalence
266277
// so I think that rules out any latin in here, so just use ascii for now
267278
// we also need to exclude our good non-single-scalar-ascii friend cr-lf
268-
if c.isASCII && c != "\r\n" {
279+
if optimizationsEnabled && c.isASCII && c != "\r\n" {
269280
builder.buildMatchScalar(c.unicodeScalars.first!, boundaryCheck: true)
270281
return
271282
}
@@ -782,12 +793,24 @@ fileprivate extension Compiler.ByteCodeGen {
782793
// lily todo: which strings are nfc invariant and matchable by direct scalar comparison?
783794
// alternatively: loop over characters in s and emit either matchScalar or matchCharacter depending on if it is NFC invariant
784795
// getting rid of matchSeq entirely does also get rid of the weird ARC
785-
if s.allSatisfy({c in c.isASCII && c != "\r\n"}) {
786-
for scalar in s.unicodeScalars.dropLast(1) {
787-
builder.buildMatchScalar(scalar, boundaryCheck: false)
796+
if optimizationsEnabled {
797+
for c in s {
798+
// Each character needs to be NFC invariant in order for us to match it directly by scalar value in grapheme cluster mode
799+
// lily temp: use isASCII for now, ask alex what exactly this check should be
800+
if c.isASCII && c != "\r\n" {
801+
builder.buildMatchScalar(c.unicodeScalars.first!, boundaryCheck: false)
802+
} else {
803+
// let's think about this carefully
804+
// what if our quoted literal is an ascii character + combining accent
805+
// what are the characters in the loop?
806+
807+
// I believe that if we ever have ascii + combining character in our input
808+
// string will automatically combine them into a unified character, so itll fall into this case
809+
810+
// so I don't think we ever need that boundaryCheck to be enabled, except at the end of this sequence
811+
builder.buildMatch(c)
812+
}
788813
}
789-
// check that we are on a boundary at the end and there isn't a combining character after this scalar
790-
builder.buildMatchScalar(s.unicodeScalars.last!, boundaryCheck: true)
791814
} else {
792815
builder.buildMatchSequence(s)
793816
}

0 commit comments

Comments
 (0)