Skip to content

[Optimization] Add instructions for consuming non-newlines and advancing in scalar view #596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -217,27 +217,16 @@ fileprivate extension Compiler.ByteCodeGen {
case .graphemeCluster:
builder.buildAdvance(1)
case .unicodeScalar:
// TODO: builder.buildAdvanceUnicodeScalar(1)
builder.buildConsume { input, bounds in
input.unicodeScalars.index(after: bounds.lowerBound)
}
builder.buildAdvanceUnicodeScalar(1)
}
}

mutating func emitAnyNonNewline() {
switch options.semanticLevel {
case .graphemeCluster:
builder.buildConsume { input, bounds in
input[bounds.lowerBound].isNewline
? nil
: input.index(after: bounds.lowerBound)
}
builder.buildConsumeNonNewline()
case .unicodeScalar:
builder.buildConsume { input, bounds in
input[bounds.lowerBound].isNewline
? nil
: input.unicodeScalars.index(after: bounds.lowerBound)
}
builder.buildConsumeScalarNonNewline()
}
}

Expand Down
16 changes: 12 additions & 4 deletions Sources/_StringProcessing/Engine/InstPayload.swift
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,19 @@ extension Instruction.Payload {
interpret()
}

init(distance: Distance) {
self.init(distance)
init(distance: Distance, isScalarDistance: Bool = false) {
self.init(isScalarDistance ? 1 : 0, distance)
}
var distance: Distance {
interpret()
var distance: (isScalarDistance: Bool, Distance) {
let pair: (UInt64, Distance) = interpretPair()
return (isScalarDistance: pair.0 == 1, pair.1)
}

init(isScalar: Bool) {
self.init(isScalar ? 1 : 0)
}
var isScalar: Bool {
self.rawValue == 1
}

init(bool: BoolRegister) {
Expand Down
4 changes: 4 additions & 0 deletions Sources/_StringProcessing/Engine/Instruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ extension Instruction {
/// - If it is inverted
/// - If it strictly matches only ascii values
case matchBuiltin

/// Matches any non newline character
/// Operand: If we are in scalar mode or not
case matchAnyNonNewline

// MARK: Extension points

Expand Down
13 changes: 13 additions & 0 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,19 @@ extension MEProgram.Builder {
mutating func buildAdvance(_ n: Distance) {
instructions.append(.init(.advance, .init(distance: n)))
}

mutating func buildAdvanceUnicodeScalar(_ n: Distance) {
instructions.append(
.init(.advance, .init(distance: n, isScalarDistance: true)))
}

mutating func buildConsumeNonNewline() {
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false)))
}

mutating func buildConsumeScalarNonNewline() {
instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true)))
}

mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
instructions.append(.init(
Expand Down
54 changes: 51 additions & 3 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,18 @@ extension Processor {
currentPosition = idx
return true
}

// Advances in unicode scalar view
mutating func consumeScalar(_ n: Distance) -> Bool {
guard let idx = input.unicodeScalars.index(
currentPosition, offsetBy: n.rawValue, limitedBy: end
) else {
signalFailure()
return false
}
currentPosition = idx
return true
}

/// Continue matching at the specified index.
///
Expand Down Expand Up @@ -321,6 +333,26 @@ extension Processor {
return true
}

// Matches the next character if it is not a newline
mutating func matchAnyNonNewline() -> Bool {
guard let c = load(), !c.isNewline else {
signalFailure()
return false
}
_uncheckedForcedConsumeOne()
return true
}

// Matches the next scalar if it is not a newline
mutating func matchAnyNonNewlineScalar() -> Bool {
guard let s = loadScalar(), !s.isNewline else {
signalFailure()
return false
}
input.unicodeScalars.formIndex(after: &currentPosition)
return true
}

mutating func signalFailure() {
guard !savePoints.isEmpty else {
state = .fail
Expand Down Expand Up @@ -469,10 +501,26 @@ extension Processor {
signalFailure()

case .advance:
if consume(payload.distance) {
controller.step()
let (isScalar, distance) = payload.distance
if isScalar {
if consumeScalar(distance) {
controller.step()
}
} else {
if consume(distance) {
controller.step()
}
}
case .matchAnyNonNewline:
if payload.isScalar {
if matchAnyNonNewlineScalar() {
controller.step()
}
} else {
if matchAnyNonNewline() {
controller.step()
}
}

case .match:
let (isCaseInsensitive, reg) = payload.elementPayload
if isCaseInsensitive {
Expand Down
5 changes: 4 additions & 1 deletion Tests/RegexTests/CompileTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ enum DecodedInstr {
case matchScalarCaseInsensitive
case matchScalarUnchecked
case matchBitsetScalar
case matchAnyNonNewline
case matchBitset
case matchBuiltin
case consumeBy
Expand Down Expand Up @@ -116,7 +117,9 @@ extension DecodedInstr {
return .matchBitset
}
case .consumeBy:
return consumeBy
return .consumeBy
case .matchAnyNonNewline:
return .matchAnyNonNewline
case .assertBy:
return .assertBy
case .matchBy:
Expand Down