Skip to content

De-genericize processor, engine, etc. #502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Sources/_StringProcessing/Algorithms/Matching/Matches.swift
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ extension BidirectionalCollection where SubSequence == Substring {
public func matches<Output>(
of r: some RegexComponent<Output>
) -> [Regex<Output>.Match] {
Array(_matches(of: r))
// FIXME: Array init calls count, which double-executes the regex :-(
// FIXME: just return some Collection<Regex<Output>.Match>
var result = Array<Regex<Output>.Match>()
for match in _matches(of: r) {
result.append(match)
}
return result
}
}
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
extension Compiler {
struct ByteCodeGen {
var options: MatchingOptions
var builder = Program.Builder()
var builder = MEProgram.Builder()
/// A Boolean indicating whether the first matchable atom has been emitted.
/// This is used to determine whether to apply initial options.
var hasEmittedFirstMatchableAtom = false
Expand All @@ -16,7 +16,7 @@ extension Compiler {
}

extension Compiler.ByteCodeGen {
mutating func emitRoot(_ root: DSLTree.Node) throws -> Program {
mutating func emitRoot(_ root: DSLTree.Node) throws -> MEProgram {
// The whole match (`.0` element of output) is equivalent to an implicit
// capture over the entire regex.
try emitNode(.capture(name: nil, reference: nil, root))
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Compiler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class Compiler {
self.tree = tree
}

__consuming func emit() throws -> Program {
__consuming func emit() throws -> MEProgram {
// TODO: Handle global options
var codegen = ByteCodeGen(
options: options, captureList: tree.captureList
Expand Down
34 changes: 17 additions & 17 deletions Sources/_StringProcessing/ConsumerInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ extension DSLTree.Node {
/// the front of an input range
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction? {
) throws -> MEProgram.ConsumeFunction? {
switch self {
case .atom(let a):
return try a.generateConsumer(opts)
Expand Down Expand Up @@ -56,7 +56,7 @@ extension DSLTree.Atom {
// top-level nodes, but it's also invoked for `.atom` members of a custom CC
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction? {
) throws -> MEProgram.ConsumeFunction? {
let isCaseInsensitive = opts.isCaseInsensitive

switch self {
Expand Down Expand Up @@ -142,7 +142,7 @@ extension String {
}
}

func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram<String>.ConsumeFunction {
func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram.ConsumeFunction {
let consume = consumeFunction(for: opts)
return consume(propertyScalarPredicate {
// FIXME: name aliases not covered by $0.nameAlias are missed
Expand Down Expand Up @@ -180,7 +180,7 @@ extension AST.Atom {

func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction? {
) throws -> MEProgram.ConsumeFunction? {
// TODO: Wean ourselves off of this type...
if let cc = self.characterClass?.withMatchLevel(
opts.matchLevel
Expand Down Expand Up @@ -237,7 +237,7 @@ extension AST.Atom {
extension DSLTree.CustomCharacterClass.Member {
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction {
) throws -> MEProgram.ConsumeFunction {
switch self {
case let .atom(a):
guard let c = try a.generateConsumer(opts) else {
Expand Down Expand Up @@ -344,7 +344,7 @@ extension DSLTree.CustomCharacterClass.Member {
extension DSLTree.CustomCharacterClass {
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction {
) throws -> MEProgram.ConsumeFunction {
// NOTE: Easy way to implement, obviously not performant
let consumers = try members.map {
try $0.generateConsumer(opts)
Expand Down Expand Up @@ -386,7 +386,7 @@ private func propertyScalarPredicate(_ p: @escaping (Unicode.Scalar.Properties)

func consumeScalar(
_ p: @escaping ScalarPredicate
) -> MEProgram<String>.ConsumeFunction {
) -> MEProgram.ConsumeFunction {
{ input, bounds in
// TODO: bounds check?
let curIdx = bounds.lowerBound
Expand All @@ -399,7 +399,7 @@ func consumeScalar(
}
func consumeCharacterWithLeadingScalar(
_ p: @escaping ScalarPredicate
) -> MEProgram<String>.ConsumeFunction {
) -> MEProgram.ConsumeFunction {
{ input, bounds in
let curIdx = bounds.lowerBound
if p(input[curIdx].unicodeScalars.first!) {
Expand All @@ -410,7 +410,7 @@ func consumeCharacterWithLeadingScalar(
}
func consumeCharacterWithSingleScalar(
_ p: @escaping ScalarPredicate
) -> MEProgram<String>.ConsumeFunction {
) -> MEProgram.ConsumeFunction {
{ input, bounds in
let curIdx = bounds.lowerBound

Expand All @@ -423,7 +423,7 @@ func consumeCharacterWithSingleScalar(

func consumeFunction(
for opts: MatchingOptions
) -> (@escaping ScalarPredicate) -> MEProgram<String>.ConsumeFunction {
) -> (@escaping ScalarPredicate) -> MEProgram.ConsumeFunction {
opts.semanticLevel == .graphemeCluster
? consumeCharacterWithLeadingScalar
: consumeScalar
Expand All @@ -432,11 +432,11 @@ func consumeFunction(
extension AST.Atom.CharacterProperty {
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction {
) throws -> MEProgram.ConsumeFunction {
// Handle inversion for us, albeit not efficiently
func invert(
_ p: @escaping MEProgram<String>.ConsumeFunction
) -> MEProgram<String>.ConsumeFunction {
_ p: @escaping MEProgram.ConsumeFunction
) -> MEProgram.ConsumeFunction {
return { input, bounds in
if p(input, bounds) != nil { return nil }

Expand All @@ -448,7 +448,7 @@ extension AST.Atom.CharacterProperty {
}

let consume = consumeFunction(for: opts)
let preInversion: MEProgram<String>.ConsumeFunction =
let preInversion: MEProgram.ConsumeFunction =
try {
switch kind {
// TODO: is this modeled differently?
Expand Down Expand Up @@ -533,7 +533,7 @@ extension Unicode.BinaryProperty {
// FIXME: Semantic level, vet for precise defs
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction {
) throws -> MEProgram.ConsumeFunction {
let consume = consumeFunction(for: opts)

// Note if you implement support for any of the below, you need to adjust
Expand Down Expand Up @@ -701,7 +701,7 @@ extension Unicode.POSIXProperty {
// FIXME: Semantic level, vet for precise defs
func generateConsumer(
_ opts: MatchingOptions
) -> MEProgram<String>.ConsumeFunction {
) -> MEProgram.ConsumeFunction {
let consume = consumeFunction(for: opts)

// FIXME: modes, etc
Expand Down Expand Up @@ -749,7 +749,7 @@ extension Unicode.ExtendedGeneralCategory {
// FIXME: Semantic level
func generateConsumer(
_ opts: MatchingOptions
) throws -> MEProgram<String>.ConsumeFunction {
) throws -> MEProgram.ConsumeFunction {
let consume = consumeFunction(for: opts)

switch self {
Expand Down
6 changes: 3 additions & 3 deletions Sources/_StringProcessing/Engine/Consume.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ var checkComments = true

extension Engine {
func makeProcessor(
input: Input, bounds: Range<Input.Index>, matchMode: MatchMode
) -> Processor<Input> {
input: String, bounds: Range<String.Index>, matchMode: MatchMode
) -> Processor {
Processor(
program: program,
input: input,
Expand All @@ -24,7 +24,7 @@ extension Engine {
}
}

extension Processor where Input == String {
extension Processor {
// TODO: Should we throw here?
mutating func consume() -> Input.Index? {
while true {
Expand Down
6 changes: 3 additions & 3 deletions Sources/_StringProcessing/Engine/Engine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

// Currently, engine binds the type and consume binds an instance.
// But, we can play around with this.
struct Engine<Input: BidirectionalCollection> where Input.Element: Hashable {
struct Engine {

var program: MEProgram<Input>
var program: MEProgram

// TODO: Pre-allocated register banks

Expand All @@ -25,7 +25,7 @@ struct Engine<Input: BidirectionalCollection> where Input.Element: Hashable {
}

init(
_ program: MEProgram<Input>,
_ program: MEProgram,
enableTracing: Bool? = nil
) {
var program = program
Expand Down
10 changes: 5 additions & 5 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

@_implementationOnly import _RegexParser // For errors

extension MEProgram where Input.Element: Hashable {
extension MEProgram {
struct Builder {
var instructions: [Instruction] = []

Expand Down Expand Up @@ -71,7 +71,7 @@ extension MEProgram.Builder {
// TODO: We want a better strategy for fixups, leaving
// the operand in a different form isn't great...

init<S: Sequence>(staticElements: S) where S.Element == Input.Element {
init<S: Sequence>(staticElements: S) where S.Element == Character {
staticElements.forEach { elements.store($0) }
}

Expand Down Expand Up @@ -183,14 +183,14 @@ extension MEProgram.Builder {
instructions.append(.init(.advance, .init(distance: n)))
}

mutating func buildMatch(_ e: Input.Element) {
mutating func buildMatch(_ e: Character) {
instructions.append(.init(
.match, .init(element: elements.store(e))))
}

mutating func buildMatchSequence<S: Sequence>(
_ s: S
) where S.Element == Input.Element {
) where S.Element == Character {
instructions.append(.init(
.matchSequence,
.init(sequence: sequences.store(.init(s)))))
Expand Down Expand Up @@ -219,7 +219,7 @@ extension MEProgram.Builder {
}

mutating func buildAssert(
_ e: Input.Element, into cond: BoolRegister
_ e: Character, into cond: BoolRegister
) {
instructions.append(.init(.assertion, .init(
element: elements.store(e), bool: cond)))
Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Engine/MECapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ extension Processor._StoredCapture: CustomStringConvertible {
}

struct MECaptureList {
var values: Array<Processor<String>._StoredCapture>
var values: Array<Processor._StoredCapture>
var referencedCaptureOffsets: [ReferenceID: Int]

func latestUntyped(from input: String) -> Array<Substring?> {
Expand Down
6 changes: 4 additions & 2 deletions Sources/_StringProcessing/Engine/MEProgram.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@

@_implementationOnly import _RegexParser

struct MEProgram<Input: BidirectionalCollection> where Input.Element: Equatable {
struct MEProgram {
typealias Input = String

typealias ConsumeFunction = (Input, Range<Input.Index>) -> Input.Index?
typealias AssertionFunction =
(Input, Input.Index, Range<Input.Index>) throws -> Bool
typealias TransformFunction =
(Input, Processor<Input>._StoredCapture) throws -> Any?
(Input, Processor._StoredCapture) throws -> Any?
typealias MatcherFunction =
(Input, Input.Index, Range<Input.Index>) throws -> (Input.Index, Any)?

Expand Down
9 changes: 3 additions & 6 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ enum MatchMode {
case partialFromFront
}

typealias Program = MEProgram<String>

/// A concrete CU. Somehow will run the concrete logic and
/// feed stuff back to generic code
struct Controller {
Expand All @@ -26,9 +24,8 @@ struct Controller {
}
}

struct Processor<
Input: BidirectionalCollection
> where Input.Element: Equatable { // maybe Hashable?
struct Processor {
typealias Input = String
typealias Element = Input.Element

let input: Input
Expand Down Expand Up @@ -75,7 +72,7 @@ extension Processor {

extension Processor {
init(
program: MEProgram<Input>,
program: MEProgram,
input: Input,
bounds: Range<Position>,
matchMode: MatchMode,
Expand Down
22 changes: 12 additions & 10 deletions Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ extension Processor {
// TODO: Degenericize Processor and store Strings
var sequences: [[Element]] = []

var consumeFunctions: [MEProgram<Input>.ConsumeFunction]
var consumeFunctions: [MEProgram.ConsumeFunction]

var assertionFunctions: [MEProgram<Input>.AssertionFunction]
var assertionFunctions: [MEProgram.AssertionFunction]

// Captured-value constructors
var transformFunctions: [MEProgram<Input>.TransformFunction]
var transformFunctions: [MEProgram.TransformFunction]

// Value-constructing matchers
var matcherFunctions: [MEProgram<Input>.MatcherFunction]
var matcherFunctions: [MEProgram.MatcherFunction]

// currently, these are for comments and abort messages
var strings: [String]
Expand All @@ -58,6 +58,8 @@ extension Processor {
}

extension Processor.Registers {
typealias Input = String

subscript(_ i: StringRegister) -> String {
strings[i.rawValue]
}
Expand Down Expand Up @@ -85,24 +87,24 @@ extension Processor.Registers {
subscript(_ i: ElementRegister) -> Input.Element {
elements[i.rawValue]
}
subscript(_ i: ConsumeFunctionRegister) -> MEProgram<Input>.ConsumeFunction {
subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction {
consumeFunctions[i.rawValue]
}
subscript(_ i: AssertionFunctionRegister) -> MEProgram<Input>.AssertionFunction {
subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction {
assertionFunctions[i.rawValue]
}
subscript(_ i: TransformRegister) -> MEProgram<Input>.TransformFunction {
subscript(_ i: TransformRegister) -> MEProgram.TransformFunction {
transformFunctions[i.rawValue]
}
subscript(_ i: MatcherRegister) -> MEProgram<Input>.MatcherFunction {
subscript(_ i: MatcherRegister) -> MEProgram.MatcherFunction {
matcherFunctions[i.rawValue]
}
}

extension Processor.Registers {
init(
_ program: MEProgram<Input>,
_ sentinel: Input.Index
_ program: MEProgram,
_ sentinel: String.Index
) {
let info = program.registerInfo

Expand Down
2 changes: 1 addition & 1 deletion Sources/_StringProcessing/Engine/Tracing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ extension Instruction.Payload: CustomStringConvertible {
}

extension Processor.SavePoint {
func describe(in input: Input) -> String {
func describe(in input: String) -> String {
let posStr: String
if let p = self.pos {
posStr = "\(input.distance(from: input.startIndex, to: p))"
Expand Down
Loading