Skip to content

Commit ceecaaa

Browse files
authored
Merge pull request #596 from rctcwyvrn/consume-non-newline
[Optimization] Add instructions for consuming non-newlines and advancing in scalar view
2 parents ec7727b + b7b23d3 commit ceecaaa

File tree

6 files changed

+87
-22
lines changed

6 files changed

+87
-22
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -220,27 +220,16 @@ fileprivate extension Compiler.ByteCodeGen {
220220
case .graphemeCluster:
221221
builder.buildAdvance(1)
222222
case .unicodeScalar:
223-
// TODO: builder.buildAdvanceUnicodeScalar(1)
224-
builder.buildConsume { input, bounds in
225-
input.unicodeScalars.index(after: bounds.lowerBound)
226-
}
223+
builder.buildAdvanceUnicodeScalar(1)
227224
}
228225
}
229226

230227
mutating func emitAnyNonNewline() {
231228
switch options.semanticLevel {
232229
case .graphemeCluster:
233-
builder.buildConsume { input, bounds in
234-
input[bounds.lowerBound].isNewline
235-
? nil
236-
: input.index(after: bounds.lowerBound)
237-
}
230+
builder.buildConsumeNonNewline()
238231
case .unicodeScalar:
239-
builder.buildConsume { input, bounds in
240-
input[bounds.lowerBound].isNewline
241-
? nil
242-
: input.unicodeScalars.index(after: bounds.lowerBound)
243-
}
232+
builder.buildConsumeScalarNonNewline()
244233
}
245234
}
246235

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,19 @@ extension Instruction.Payload {
196196
interpret()
197197
}
198198

199-
init(distance: Distance) {
200-
self.init(distance)
199+
init(distance: Distance, isScalarDistance: Bool = false) {
200+
self.init(isScalarDistance ? 1 : 0, distance)
201201
}
202-
var distance: Distance {
203-
interpret()
202+
var distance: (isScalarDistance: Bool, Distance) {
203+
let pair: (UInt64, Distance) = interpretPair()
204+
return (isScalarDistance: pair.0 == 1, pair.1)
205+
}
206+
207+
init(isScalar: Bool) {
208+
self.init(isScalar ? 1 : 0)
209+
}
210+
var isScalar: Bool {
211+
self.rawValue == 1
204212
}
205213

206214
init(bool: BoolRegister) {

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ extension Instruction {
122122
/// - If it is inverted
123123
/// - If it strictly matches only ascii values
124124
case matchBuiltin
125+
126+
/// Matches any non newline character
127+
/// Operand: If we are in scalar mode or not
128+
case matchAnyNonNewline
125129

126130
// MARK: Extension points
127131

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,19 @@ extension MEProgram.Builder {
142142
mutating func buildAdvance(_ n: Distance) {
143143
instructions.append(.init(.advance, .init(distance: n)))
144144
}
145+
146+
mutating func buildAdvanceUnicodeScalar(_ n: Distance) {
147+
instructions.append(
148+
.init(.advance, .init(distance: n, isScalarDistance: true)))
149+
}
150+
151+
mutating func buildConsumeNonNewline() {
152+
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false)))
153+
}
154+
155+
mutating func buildConsumeScalarNonNewline() {
156+
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true)))
157+
}
145158

146159
mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
147160
instructions.append(.init(

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ extension Processor {
181181
currentPosition = idx
182182
return true
183183
}
184+
185+
// Advances in unicode scalar view
186+
mutating func consumeScalar(_ n: Distance) -> Bool {
187+
guard let idx = input.unicodeScalars.index(
188+
currentPosition, offsetBy: n.rawValue, limitedBy: end
189+
) else {
190+
signalFailure()
191+
return false
192+
}
193+
currentPosition = idx
194+
return true
195+
}
184196

185197
/// Continue matching at the specified index.
186198
///
@@ -329,6 +341,26 @@ extension Processor {
329341
return true
330342
}
331343

344+
// Matches the next character if it is not a newline
345+
mutating func matchAnyNonNewline() -> Bool {
346+
guard let c = load(), !c.isNewline else {
347+
signalFailure()
348+
return false
349+
}
350+
_uncheckedForcedConsumeOne()
351+
return true
352+
}
353+
354+
// Matches the next scalar if it is not a newline
355+
mutating func matchAnyNonNewlineScalar() -> Bool {
356+
guard let s = loadScalar(), !s.isNewline else {
357+
signalFailure()
358+
return false
359+
}
360+
input.unicodeScalars.formIndex(after: &currentPosition)
361+
return true
362+
}
363+
332364
mutating func signalFailure() {
333365
guard !savePoints.isEmpty else {
334366
state = .fail
@@ -477,10 +509,26 @@ extension Processor {
477509
signalFailure()
478510

479511
case .advance:
480-
if consume(payload.distance) {
481-
controller.step()
512+
let (isScalar, distance) = payload.distance
513+
if isScalar {
514+
if consumeScalar(distance) {
515+
controller.step()
516+
}
517+
} else {
518+
if consume(distance) {
519+
controller.step()
520+
}
521+
}
522+
case .matchAnyNonNewline:
523+
if payload.isScalar {
524+
if matchAnyNonNewlineScalar() {
525+
controller.step()
526+
}
527+
} else {
528+
if matchAnyNonNewline() {
529+
controller.step()
530+
}
482531
}
483-
484532
case .match:
485533
let (isCaseInsensitive, reg) = payload.elementPayload
486534
if isCaseInsensitive {

Tests/RegexTests/CompileTests.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ enum DecodedInstr {
3737
case matchScalarCaseInsensitive
3838
case matchScalarUnchecked
3939
case matchBitsetScalar
40+
case matchAnyNonNewline
4041
case matchBitset
4142
case matchBuiltin
4243
case consumeBy
@@ -116,7 +117,9 @@ extension DecodedInstr {
116117
return .matchBitset
117118
}
118119
case .consumeBy:
119-
return consumeBy
120+
return .consumeBy
121+
case .matchAnyNonNewline:
122+
return .matchAnyNonNewline
120123
case .assertBy:
121124
return .assertBy
122125
case .matchBy:

0 commit comments

Comments
 (0)