Skip to content

Commit 94f5d33

Browse files
authored
De-genericize processor, engine, etc. (#502)
* Avoid double execution by avoiding Array init * De-genericize processor, engine, etc. Provides only modest performance improvements (it was already getting specialized), but makes it possible to add String-specific specializations.
1 parent 4cea05a commit 94f5d33

15 files changed

+66
-59
lines changed

Sources/_StringProcessing/Algorithms/Matching/Matches.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,12 @@ extension BidirectionalCollection where SubSequence == Substring {
349349
public func matches<Output>(
350350
of r: some RegexComponent<Output>
351351
) -> [Regex<Output>.Match] {
352-
Array(_matches(of: r))
352+
// FIXME: Array init calls count, which double-executes the regex :-(
353+
// FIXME: just return some Collection<Regex<Output>.Match>
354+
var result = Array<Regex<Output>.Match>()
355+
for match in _matches(of: r) {
356+
result.append(match)
357+
}
358+
return result
353359
}
354360
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
extension Compiler {
44
struct ByteCodeGen {
55
var options: MatchingOptions
6-
var builder = Program.Builder()
6+
var builder = MEProgram.Builder()
77
/// A Boolean indicating whether the first matchable atom has been emitted.
88
/// This is used to determine whether to apply initial options.
99
var hasEmittedFirstMatchableAtom = false
@@ -16,7 +16,7 @@ extension Compiler {
1616
}
1717

1818
extension Compiler.ByteCodeGen {
19-
mutating func emitRoot(_ root: DSLTree.Node) throws -> Program {
19+
mutating func emitRoot(_ root: DSLTree.Node) throws -> MEProgram {
2020
// The whole match (`.0` element of output) is equivalent to an implicit
2121
// capture over the entire regex.
2222
try emitNode(.capture(name: nil, reference: nil, root))

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class Compiler {
2525
self.tree = tree
2626
}
2727

28-
__consuming func emit() throws -> Program {
28+
__consuming func emit() throws -> MEProgram {
2929
// TODO: Handle global options
3030
var codegen = ByteCodeGen(
3131
options: options, captureList: tree.captureList

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extension DSLTree.Node {
1818
/// the front of an input range
1919
func generateConsumer(
2020
_ opts: MatchingOptions
21-
) throws -> MEProgram<String>.ConsumeFunction? {
21+
) throws -> MEProgram.ConsumeFunction? {
2222
switch self {
2323
case .atom(let a):
2424
return try a.generateConsumer(opts)
@@ -56,7 +56,7 @@ extension DSLTree.Atom {
5656
// top-level nodes, but it's also invoked for `.atom` members of a custom CC
5757
func generateConsumer(
5858
_ opts: MatchingOptions
59-
) throws -> MEProgram<String>.ConsumeFunction? {
59+
) throws -> MEProgram.ConsumeFunction? {
6060
let isCaseInsensitive = opts.isCaseInsensitive
6161

6262
switch self {
@@ -142,7 +142,7 @@ extension String {
142142
}
143143
}
144144

145-
func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram<String>.ConsumeFunction {
145+
func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram.ConsumeFunction {
146146
let consume = consumeFunction(for: opts)
147147
return consume(propertyScalarPredicate {
148148
// FIXME: name aliases not covered by $0.nameAlias are missed
@@ -180,7 +180,7 @@ extension AST.Atom {
180180

181181
func generateConsumer(
182182
_ opts: MatchingOptions
183-
) throws -> MEProgram<String>.ConsumeFunction? {
183+
) throws -> MEProgram.ConsumeFunction? {
184184
// TODO: Wean ourselves off of this type...
185185
if let cc = self.characterClass?.withMatchLevel(
186186
opts.matchLevel
@@ -237,7 +237,7 @@ extension AST.Atom {
237237
extension DSLTree.CustomCharacterClass.Member {
238238
func generateConsumer(
239239
_ opts: MatchingOptions
240-
) throws -> MEProgram<String>.ConsumeFunction {
240+
) throws -> MEProgram.ConsumeFunction {
241241
switch self {
242242
case let .atom(a):
243243
guard let c = try a.generateConsumer(opts) else {
@@ -344,7 +344,7 @@ extension DSLTree.CustomCharacterClass.Member {
344344
extension DSLTree.CustomCharacterClass {
345345
func generateConsumer(
346346
_ opts: MatchingOptions
347-
) throws -> MEProgram<String>.ConsumeFunction {
347+
) throws -> MEProgram.ConsumeFunction {
348348
// NOTE: Easy way to implement, obviously not performant
349349
let consumers = try members.map {
350350
try $0.generateConsumer(opts)
@@ -386,7 +386,7 @@ private func propertyScalarPredicate(_ p: @escaping (Unicode.Scalar.Properties)
386386

387387
func consumeScalar(
388388
_ p: @escaping ScalarPredicate
389-
) -> MEProgram<String>.ConsumeFunction {
389+
) -> MEProgram.ConsumeFunction {
390390
{ input, bounds in
391391
// TODO: bounds check?
392392
let curIdx = bounds.lowerBound
@@ -399,7 +399,7 @@ func consumeScalar(
399399
}
400400
func consumeCharacterWithLeadingScalar(
401401
_ p: @escaping ScalarPredicate
402-
) -> MEProgram<String>.ConsumeFunction {
402+
) -> MEProgram.ConsumeFunction {
403403
{ input, bounds in
404404
let curIdx = bounds.lowerBound
405405
if p(input[curIdx].unicodeScalars.first!) {
@@ -410,7 +410,7 @@ func consumeCharacterWithLeadingScalar(
410410
}
411411
func consumeCharacterWithSingleScalar(
412412
_ p: @escaping ScalarPredicate
413-
) -> MEProgram<String>.ConsumeFunction {
413+
) -> MEProgram.ConsumeFunction {
414414
{ input, bounds in
415415
let curIdx = bounds.lowerBound
416416

@@ -423,7 +423,7 @@ func consumeCharacterWithSingleScalar(
423423

424424
func consumeFunction(
425425
for opts: MatchingOptions
426-
) -> (@escaping ScalarPredicate) -> MEProgram<String>.ConsumeFunction {
426+
) -> (@escaping ScalarPredicate) -> MEProgram.ConsumeFunction {
427427
opts.semanticLevel == .graphemeCluster
428428
? consumeCharacterWithLeadingScalar
429429
: consumeScalar
@@ -432,11 +432,11 @@ func consumeFunction(
432432
extension AST.Atom.CharacterProperty {
433433
func generateConsumer(
434434
_ opts: MatchingOptions
435-
) throws -> MEProgram<String>.ConsumeFunction {
435+
) throws -> MEProgram.ConsumeFunction {
436436
// Handle inversion for us, albeit not efficiently
437437
func invert(
438-
_ p: @escaping MEProgram<String>.ConsumeFunction
439-
) -> MEProgram<String>.ConsumeFunction {
438+
_ p: @escaping MEProgram.ConsumeFunction
439+
) -> MEProgram.ConsumeFunction {
440440
return { input, bounds in
441441
if p(input, bounds) != nil { return nil }
442442

@@ -448,7 +448,7 @@ extension AST.Atom.CharacterProperty {
448448
}
449449

450450
let consume = consumeFunction(for: opts)
451-
let preInversion: MEProgram<String>.ConsumeFunction =
451+
let preInversion: MEProgram.ConsumeFunction =
452452
try {
453453
switch kind {
454454
// TODO: is this modeled differently?
@@ -533,7 +533,7 @@ extension Unicode.BinaryProperty {
533533
// FIXME: Semantic level, vet for precise defs
534534
func generateConsumer(
535535
_ opts: MatchingOptions
536-
) throws -> MEProgram<String>.ConsumeFunction {
536+
) throws -> MEProgram.ConsumeFunction {
537537
let consume = consumeFunction(for: opts)
538538

539539
// Note if you implement support for any of the below, you need to adjust
@@ -701,7 +701,7 @@ extension Unicode.POSIXProperty {
701701
// FIXME: Semantic level, vet for precise defs
702702
func generateConsumer(
703703
_ opts: MatchingOptions
704-
) -> MEProgram<String>.ConsumeFunction {
704+
) -> MEProgram.ConsumeFunction {
705705
let consume = consumeFunction(for: opts)
706706

707707
// FIXME: modes, etc
@@ -749,7 +749,7 @@ extension Unicode.ExtendedGeneralCategory {
749749
// FIXME: Semantic level
750750
func generateConsumer(
751751
_ opts: MatchingOptions
752-
) throws -> MEProgram<String>.ConsumeFunction {
752+
) throws -> MEProgram.ConsumeFunction {
753753
let consume = consumeFunction(for: opts)
754754

755755
switch self {

Sources/_StringProcessing/Engine/Consume.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ var checkComments = true
1313

1414
extension Engine {
1515
func makeProcessor(
16-
input: Input, bounds: Range<Input.Index>, matchMode: MatchMode
17-
) -> Processor<Input> {
16+
input: String, bounds: Range<String.Index>, matchMode: MatchMode
17+
) -> Processor {
1818
Processor(
1919
program: program,
2020
input: input,
@@ -24,7 +24,7 @@ extension Engine {
2424
}
2525
}
2626

27-
extension Processor where Input == String {
27+
extension Processor {
2828
// TODO: Should we throw here?
2929
mutating func consume() -> Input.Index? {
3030
while true {

Sources/_StringProcessing/Engine/Engine.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111

1212
// Currently, engine binds the type and consume binds an instance.
1313
// But, we can play around with this.
14-
struct Engine<Input: BidirectionalCollection> where Input.Element: Hashable {
14+
struct Engine {
1515

16-
var program: MEProgram<Input>
16+
var program: MEProgram
1717

1818
// TODO: Pre-allocated register banks
1919

@@ -25,7 +25,7 @@ struct Engine<Input: BidirectionalCollection> where Input.Element: Hashable {
2525
}
2626

2727
init(
28-
_ program: MEProgram<Input>,
28+
_ program: MEProgram,
2929
enableTracing: Bool? = nil
3030
) {
3131
var program = program

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
@_implementationOnly import _RegexParser // For errors
1313

14-
extension MEProgram where Input.Element: Hashable {
14+
extension MEProgram {
1515
struct Builder {
1616
var instructions: [Instruction] = []
1717

@@ -71,7 +71,7 @@ extension MEProgram.Builder {
7171
// TODO: We want a better strategy for fixups, leaving
7272
// the operand in a different form isn't great...
7373

74-
init<S: Sequence>(staticElements: S) where S.Element == Input.Element {
74+
init<S: Sequence>(staticElements: S) where S.Element == Character {
7575
staticElements.forEach { elements.store($0) }
7676
}
7777

@@ -183,14 +183,14 @@ extension MEProgram.Builder {
183183
instructions.append(.init(.advance, .init(distance: n)))
184184
}
185185

186-
mutating func buildMatch(_ e: Input.Element) {
186+
mutating func buildMatch(_ e: Character) {
187187
instructions.append(.init(
188188
.match, .init(element: elements.store(e))))
189189
}
190190

191191
mutating func buildMatchSequence<S: Sequence>(
192192
_ s: S
193-
) where S.Element == Input.Element {
193+
) where S.Element == Character {
194194
instructions.append(.init(
195195
.matchSequence,
196196
.init(sequence: sequences.store(.init(s)))))
@@ -219,7 +219,7 @@ extension MEProgram.Builder {
219219
}
220220

221221
mutating func buildAssert(
222-
_ e: Input.Element, into cond: BoolRegister
222+
_ e: Character, into cond: BoolRegister
223223
) {
224224
instructions.append(.init(.assertion, .init(
225225
element: elements.store(e), bool: cond)))

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ extension Processor._StoredCapture: CustomStringConvertible {
9595
}
9696

9797
struct MECaptureList {
98-
var values: Array<Processor<String>._StoredCapture>
98+
var values: Array<Processor._StoredCapture>
9999
var referencedCaptureOffsets: [ReferenceID: Int]
100100

101101
func latestUntyped(from input: String) -> Array<Substring?> {

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111

1212
@_implementationOnly import _RegexParser
1313

14-
struct MEProgram<Input: BidirectionalCollection> where Input.Element: Equatable {
14+
struct MEProgram {
15+
typealias Input = String
16+
1517
typealias ConsumeFunction = (Input, Range<Input.Index>) -> Input.Index?
1618
typealias AssertionFunction =
1719
(Input, Input.Index, Range<Input.Index>) throws -> Bool
1820
typealias TransformFunction =
19-
(Input, Processor<Input>._StoredCapture) throws -> Any?
21+
(Input, Processor._StoredCapture) throws -> Any?
2022
typealias MatcherFunction =
2123
(Input, Input.Index, Range<Input.Index>) throws -> (Input.Index, Any)?
2224

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ enum MatchMode {
1414
case partialFromFront
1515
}
1616

17-
typealias Program = MEProgram<String>
18-
1917
/// A concrete CU. Somehow will run the concrete logic and
2018
/// feed stuff back to generic code
2119
struct Controller {
@@ -26,9 +24,8 @@ struct Controller {
2624
}
2725
}
2826

29-
struct Processor<
30-
Input: BidirectionalCollection
31-
> where Input.Element: Equatable { // maybe Hashable?
27+
struct Processor {
28+
typealias Input = String
3229
typealias Element = Input.Element
3330

3431
let input: Input
@@ -75,7 +72,7 @@ extension Processor {
7572

7673
extension Processor {
7774
init(
78-
program: MEProgram<Input>,
75+
program: MEProgram,
7976
input: Input,
8077
bounds: Range<Position>,
8178
matchMode: MatchMode,

Sources/_StringProcessing/Engine/Registers.swift

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@ extension Processor {
2929
// TODO: Degenericize Processor and store Strings
3030
var sequences: [[Element]] = []
3131

32-
var consumeFunctions: [MEProgram<Input>.ConsumeFunction]
32+
var consumeFunctions: [MEProgram.ConsumeFunction]
3333

34-
var assertionFunctions: [MEProgram<Input>.AssertionFunction]
34+
var assertionFunctions: [MEProgram.AssertionFunction]
3535

3636
// Captured-value constructors
37-
var transformFunctions: [MEProgram<Input>.TransformFunction]
37+
var transformFunctions: [MEProgram.TransformFunction]
3838

3939
// Value-constructing matchers
40-
var matcherFunctions: [MEProgram<Input>.MatcherFunction]
40+
var matcherFunctions: [MEProgram.MatcherFunction]
4141

4242
// currently, these are for comments and abort messages
4343
var strings: [String]
@@ -58,6 +58,8 @@ extension Processor {
5858
}
5959

6060
extension Processor.Registers {
61+
typealias Input = String
62+
6163
subscript(_ i: StringRegister) -> String {
6264
strings[i.rawValue]
6365
}
@@ -85,24 +87,24 @@ extension Processor.Registers {
8587
subscript(_ i: ElementRegister) -> Input.Element {
8688
elements[i.rawValue]
8789
}
88-
subscript(_ i: ConsumeFunctionRegister) -> MEProgram<Input>.ConsumeFunction {
90+
subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction {
8991
consumeFunctions[i.rawValue]
9092
}
91-
subscript(_ i: AssertionFunctionRegister) -> MEProgram<Input>.AssertionFunction {
93+
subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction {
9294
assertionFunctions[i.rawValue]
9395
}
94-
subscript(_ i: TransformRegister) -> MEProgram<Input>.TransformFunction {
96+
subscript(_ i: TransformRegister) -> MEProgram.TransformFunction {
9597
transformFunctions[i.rawValue]
9698
}
97-
subscript(_ i: MatcherRegister) -> MEProgram<Input>.MatcherFunction {
99+
subscript(_ i: MatcherRegister) -> MEProgram.MatcherFunction {
98100
matcherFunctions[i.rawValue]
99101
}
100102
}
101103

102104
extension Processor.Registers {
103105
init(
104-
_ program: MEProgram<Input>,
105-
_ sentinel: Input.Index
106+
_ program: MEProgram,
107+
_ sentinel: String.Index
106108
) {
107109
let info = program.registerInfo
108110

Sources/_StringProcessing/Engine/Tracing.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ extension Instruction.Payload: CustomStringConvertible {
5454
}
5555

5656
extension Processor.SavePoint {
57-
func describe(in input: Input) -> String {
57+
func describe(in input: String) -> String {
5858
let posStr: String
5959
if let p = self.pos {
6060
posStr = "\(input.distance(from: input.startIndex, to: p))"

0 commit comments

Comments
 (0)