-
Notifications
You must be signed in to change notification settings - Fork 49
Disentangle disparate 'bounds' ideas in processor #496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
79e4c65
8b585af
a6ff9a5
a87bc1b
d1290f8
69dae18
80ccef6
5f56e39
46173f2
6dcb9fd
45c5bd2
e80fe54
7c125bc
7bf5b14
6ce6108
73ba7c7
0da43e5
c1a6edf
8d8571f
94612b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,18 +31,36 @@ struct Processor< | |
> where Input.Element: Equatable { // maybe Hashable? | ||
typealias Element = Input.Element | ||
|
||
/// The base collection of the subject to search. | ||
/// | ||
/// Taken together, `input` and `subjectBounds` define the actual subject | ||
/// of the search. `input` can be a "supersequence" of the subject, while | ||
/// `input[subjectBounds]` is the logical entity that is being searched. | ||
let input: Input | ||
|
||
/// The bounds of the logical subject in `input`. | ||
/// | ||
/// | ||
/// `subjectBounds` is equal to or a subrange of | ||
/// `input.startIndex..<input.endIndex`. | ||
let subjectBounds: Range<Position> | ||
|
||
let matchMode: MatchMode | ||
let instructions: InstructionList<Instruction> | ||
|
||
// MARK: Resettable state | ||
|
||
// The subject bounds. | ||
// | ||
// FIXME: This also conflates search bounds too! | ||
var bounds: Range<Position> | ||
|
||
// The current position in the subject | ||
/// The bounds within the subject for an individual search. | ||
/// | ||
/// `searchBounds` is equal to `subjectBounds` in some cases, but can be a | ||
/// subrange when performing operations like searching for matches iteratively | ||
/// or calling `str.replacing(_:with:subrange:)`. | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
var searchBounds: Range<Position> | ||
|
||
/// The current search position while processing. | ||
/// | ||
/// `currentPosition` must always be in the range `subjectBounds` or equal | ||
/// to `subjectBounds.upperBound`. | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
var currentPosition: Position | ||
|
||
var controller: Controller | ||
|
@@ -59,53 +77,51 @@ struct Processor< | |
|
||
var failureReason: Error? = nil | ||
|
||
|
||
// MARK: Metrics, debugging, etc. | ||
var cycleCount = 0 | ||
var isTracingEnabled: Bool | ||
|
||
} | ||
|
||
extension Processor { | ||
typealias Position = Input.Index | ||
|
||
var start: Position { bounds.lowerBound } | ||
var end: Position { bounds.upperBound } | ||
var start: Position { subjectBounds.lowerBound } | ||
var end: Position { subjectBounds.upperBound } | ||
} | ||
|
||
extension Processor { | ||
init( | ||
program: MEProgram<Input>, | ||
input: Input, | ||
bounds: Range<Position>, | ||
subjectBounds: Range<Position>, | ||
searchBounds: Range<Position>, | ||
matchMode: MatchMode, | ||
isTracingEnabled: Bool | ||
) { | ||
self.controller = Controller(pc: 0) | ||
self.instructions = program.instructions | ||
self.input = input | ||
self.bounds = bounds | ||
self.subjectBounds = subjectBounds | ||
self.searchBounds = searchBounds | ||
self.matchMode = matchMode | ||
self.isTracingEnabled = isTracingEnabled | ||
self.currentPosition = bounds.lowerBound | ||
self.currentPosition = searchBounds.lowerBound | ||
|
||
self.registers = Registers(program, bounds.upperBound) | ||
// Initialize registers with end of search bounds | ||
self.registers = Registers(program, searchBounds.upperBound) | ||
self.storedCaptures = Array( | ||
repeating: .init(), count: program.registerInfo.captures) | ||
|
||
_checkInvariants() | ||
} | ||
|
||
|
||
mutating func reset(searchBounds: Range<Position>) { | ||
// FIXME: We currently conflate both subject bounds and search bounds | ||
// This should just reset search bounds | ||
self.bounds = searchBounds | ||
self.currentPosition = self.bounds.lowerBound | ||
self.searchBounds = searchBounds | ||
self.currentPosition = self.searchBounds.lowerBound | ||
|
||
self.controller = Controller(pc: 0) | ||
|
||
self.registers.reset(sentinel: bounds.upperBound) | ||
self.registers.reset(sentinel: searchBounds.upperBound) | ||
|
||
self.savePoints.removeAll(keepingCapacity: true) | ||
self.callStack.removeAll(keepingCapacity: true) | ||
|
@@ -132,7 +148,7 @@ extension Processor { | |
var slice: Input.SubSequence { | ||
// TODO: Should we whole-scale switch to slices, or | ||
// does that depend on options for some anchors? | ||
input[bounds] | ||
input[subjectBounds] | ||
} | ||
|
||
// Advance in our input, without any checks or failure signalling | ||
|
@@ -161,8 +177,8 @@ extension Processor { | |
/// - Precondition: `bounds.contains(index) || index == bounds.upperBound` | ||
/// - Precondition: `index >= currentPosition` | ||
mutating func resume(at index: Input.Index) { | ||
assert(index >= bounds.lowerBound) | ||
assert(index <= bounds.upperBound) | ||
assert(index >= subjectBounds.lowerBound) | ||
assert(index <= subjectBounds.upperBound) | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert(index >= currentPosition) | ||
currentPosition = index | ||
} | ||
|
@@ -233,7 +249,7 @@ extension Processor { | |
switch (currentPosition, matchMode) { | ||
// When reaching the end of the match bounds or when we are only doing a | ||
// prefix match, transition to accept. | ||
case (bounds.upperBound, _), (_, .partialFromFront): | ||
case (subjectBounds.upperBound, _), (_, .partialFromFront): | ||
state = .accept | ||
|
||
// When we are doing a full match but did not reach the end of the match | ||
|
@@ -411,9 +427,9 @@ extension Processor { | |
|
||
case .consumeBy: | ||
let reg = payload.consumer | ||
guard currentPosition < bounds.upperBound, | ||
guard currentPosition < subjectBounds.upperBound, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subject or search bounds? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we, generally, know which one to choose? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally, we want to be moving and matching with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (i.e. this was incorrect) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any way we could use access control or some better coding convention here? E.g. if we always want search bounds except for anchors, can we isolate the code that refers to subject bounds inside the engine? |
||
let nextIndex = registers[reg]( | ||
input, currentPosition..<bounds.upperBound) | ||
input, currentPosition..<subjectBounds.upperBound) | ||
else { | ||
signalFailure() | ||
return | ||
|
@@ -425,7 +441,7 @@ extension Processor { | |
let reg = payload.assertion | ||
let assertion = registers[reg] | ||
do { | ||
guard try assertion(input, currentPosition, bounds) else { | ||
guard try assertion(input, currentPosition, subjectBounds) else { | ||
signalFailure() | ||
return | ||
} | ||
|
@@ -440,7 +456,7 @@ extension Processor { | |
let matcher = registers[matcherReg] | ||
do { | ||
guard let (nextIdx, val) = try matcher( | ||
input, currentPosition, bounds | ||
input, currentPosition, subjectBounds | ||
) else { | ||
signalFailure() | ||
return | ||
|
Uh oh!
There was an error while loading. Please reload this page.