@@ -65,10 +65,14 @@ fileprivate extension Compiler.ByteCodeGen {
65
65
emitDot ( )
66
66
67
67
case let . char( c) :
68
- try emitCharacter ( c)
68
+ emitCharacter ( c)
69
69
70
70
case let . scalar( s) :
71
- try emitScalar ( s)
71
+ if options. semanticLevel == . graphemeCluster {
72
+ emitCharacter ( Character ( s) )
73
+ } else {
74
+ emitMatchScalar ( s)
75
+ }
72
76
73
77
case let . assertion( kind) :
74
78
try emitAssertion ( kind)
@@ -94,6 +98,34 @@ fileprivate extension Compiler.ByteCodeGen {
94
98
}
95
99
}
96
100
101
+ mutating func emitQuotedLiteral( _ s: String ) {
102
+ guard options. semanticLevel == . graphemeCluster else {
103
+ for char in s {
104
+ for scalar in char. unicodeScalars {
105
+ emitMatchScalar ( scalar)
106
+ }
107
+ }
108
+ return
109
+ }
110
+
111
+ // Fast path for eliding boundary checks for an all ascii quoted literal
112
+ if optimizationsEnabled && s. allSatisfy ( \. isASCII) {
113
+ let lastIdx = s. unicodeScalars. indices. last!
114
+ for idx in s. unicodeScalars. indices {
115
+ let boundaryCheck = idx == lastIdx
116
+ let scalar = s. unicodeScalars [ idx]
117
+ if options. isCaseInsensitive && scalar. properties. isCased {
118
+ builder. buildMatchScalarCaseInsensitive ( scalar, boundaryCheck: boundaryCheck)
119
+ } else {
120
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
121
+ }
122
+ }
123
+ return
124
+ }
125
+
126
+ for c in s { emitCharacter ( c) }
127
+ }
128
+
97
129
mutating func emitBackreference(
98
130
_ ref: AST . Reference
99
131
) throws {
@@ -257,41 +289,47 @@ fileprivate extension Compiler.ByteCodeGen {
257
289
}
258
290
}
259
291
260
- mutating func emitScalar( _ s: UnicodeScalar ) throws {
261
- // TODO: Native instruction buildMatchScalar(s)
262
- if options. isCaseInsensitive {
263
- // TODO: e.g. buildCaseInsensitiveMatchScalar(s)
264
- builder. buildConsume ( by: consumeScalar {
265
- $0. properties. lowercaseMapping == s. properties. lowercaseMapping
266
- } )
292
+ mutating func emitMatchScalar( _ s: UnicodeScalar ) {
293
+ assert ( options. semanticLevel == . unicodeScalar)
294
+ if options. isCaseInsensitive && s. properties. isCased {
295
+ builder. buildMatchScalarCaseInsensitive ( s, boundaryCheck: false )
267
296
} else {
268
- builder. buildConsume ( by: consumeScalar {
269
- $0 == s
270
- } )
297
+ builder. buildMatchScalar ( s, boundaryCheck: false )
271
298
}
272
299
}
273
300
274
- mutating func emitCharacter( _ c: Character ) throws {
275
- // Unicode scalar matches the specific scalars that comprise a character
301
+ mutating func emitCharacter( _ c: Character ) {
302
+ // Unicode scalar mode matches the specific scalars that comprise a character
276
303
if options. semanticLevel == . unicodeScalar {
277
304
for scalar in c. unicodeScalars {
278
- try emitScalar ( scalar)
305
+ emitMatchScalar ( scalar)
279
306
}
280
307
return
281
308
}
282
309
283
310
if options. isCaseInsensitive && c. isCased {
284
- // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
285
- builder. buildConsume { input, bounds in
286
- let inputChar = input [ bounds. lowerBound] . lowercased ( )
287
- let matchChar = c. lowercased ( )
288
- return inputChar == matchChar
289
- ? input. index ( after: bounds. lowerBound)
290
- : nil
311
+ if optimizationsEnabled && c. isASCII {
312
+ // c.isCased ensures that c is not CR-LF,
313
+ // so we know that c is a single scalar
314
+ assert ( c. unicodeScalars. count == 1 )
315
+ builder. buildMatchScalarCaseInsensitive (
316
+ c. unicodeScalars. last!,
317
+ boundaryCheck: true )
318
+ } else {
319
+ builder. buildMatch ( c, isCaseInsensitive: true )
291
320
}
292
- } else {
293
- builder. buildMatch ( c)
321
+ return
294
322
}
323
+
324
+ if optimizationsEnabled && c. isASCII {
325
+ let lastIdx = c. unicodeScalars. indices. last!
326
+ for idx in c. unicodeScalars. indices {
327
+ builder. buildMatchScalar ( c. unicodeScalars [ idx] , boundaryCheck: idx == lastIdx)
328
+ }
329
+ return
330
+ }
331
+
332
+ builder. buildMatch ( c, isCaseInsensitive: false )
295
333
}
296
334
297
335
mutating func emitAny( ) {
@@ -741,11 +779,12 @@ fileprivate extension Compiler.ByteCodeGen {
741
779
_ ccc: DSLTree . CustomCharacterClass
742
780
) throws {
743
781
if let asciiBitset = ccc. asAsciiBitset ( options) ,
744
- options. semanticLevel == . graphemeCluster,
745
782
optimizationsEnabled {
746
- // future work: add a bit to .matchBitset to consume either a character
747
- // or a scalar so we can have this optimization in scalar mode
748
- builder. buildMatchAsciiBitset ( asciiBitset)
783
+ if options. semanticLevel == . unicodeScalar {
784
+ builder. buildScalarMatchAsciiBitset ( asciiBitset)
785
+ } else {
786
+ builder. buildMatchAsciiBitset ( asciiBitset)
787
+ }
749
788
} else {
750
789
let consumer = try ccc. generateConsumer ( options)
751
790
builder. buildConsume ( by: consumer)
@@ -822,45 +861,7 @@ fileprivate extension Compiler.ByteCodeGen {
822
861
try emitAtom ( a)
823
862
824
863
case let . quotedLiteral( s) :
825
- if options. semanticLevel == . graphemeCluster {
826
- if options. isCaseInsensitive {
827
- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
828
- builder. buildConsume { input, bounds in
829
- var iterator = s. makeIterator ( )
830
- var currentIndex = bounds. lowerBound
831
- while let ch = iterator. next ( ) {
832
- guard currentIndex < bounds. upperBound,
833
- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
834
- else { return nil }
835
- input. formIndex ( after: & currentIndex)
836
- }
837
- return currentIndex
838
- }
839
- } else {
840
- builder. buildMatchSequence ( s)
841
- }
842
- } else {
843
- builder. buildConsume {
844
- [ caseInsensitive = options. isCaseInsensitive] input, bounds in
845
- // TODO: Case folding
846
- var iterator = s. unicodeScalars. makeIterator ( )
847
- var currentIndex = bounds. lowerBound
848
- while let scalar = iterator. next ( ) {
849
- guard currentIndex < bounds. upperBound else { return nil }
850
- if caseInsensitive {
851
- if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
852
- return nil
853
- }
854
- } else {
855
- if scalar != input. unicodeScalars [ currentIndex] {
856
- return nil
857
- }
858
- }
859
- input. unicodeScalars. formIndex ( after: & currentIndex)
860
- }
861
- return currentIndex
862
- }
863
- }
864
+ emitQuotedLiteral ( s)
864
865
865
866
case let . convertedRegexLiteral( n, _) :
866
867
return try emitNode ( n)
0 commit comments