Skip to content

Commit 612f104

Browse files
authored
DSLTree (#134)
DSLTree, a shared representation for compilation and printing Bug fix in matching engine for reading subsequences out of bounds DSLTree hooked up to legacy VMs
1 parent c708963 commit 612f104

23 files changed

+3773
-2783
lines changed

Package.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ let package = Package(
6868
dependencies: [
6969
.product(name: "ArgumentParser", package: "swift-argument-parser"),
7070
"_MatchingEngine",
71+
"_StringProcessing"
7172
]),
7273

7374
// MARK: Exercises

Sources/PatternConverter/PatternConverter.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import ArgumentParser
1515
import _MatchingEngine
16+
import _StringProcessing
1617

1718
@main
1819
struct PatternConverter: ParsableCommand {

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,13 +251,13 @@ struct VariadicsGenerator: ParsableCommand {
251251
output(" init(")
252252
outputForEach(0..<arity, separator: ", ") { "_ x\($0): T\($0)" }
253253
output(") {\n")
254-
output(" \(patternProtocolRequirementName) = .init(ast: concat(\n ")
254+
output(" \(patternProtocolRequirementName) = .init(node: .concatenation([\n ")
255255
outputForEach(
256256
0..<arity, separator: ", ", lineTerminator: ""
257257
) { i in
258-
"x\(i).\(patternProtocolRequirementName).ast.root"
258+
"x\(i).\(patternProtocolRequirementName).root"
259259
}
260-
output("))\n")
260+
output("]))\n")
261261
output(" }\n}\n\n")
262262

263263
// Emit concatenation builders.

Sources/_MatchingEngine/Engine/Processor.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ extension Processor {
9393
}
9494

9595
extension Processor {
96+
var slice: Input.SubSequence {
97+
// TODO: Should we whole-scale switch to slices, or
98+
// does that depend on options for some anchors?
99+
input[bounds]
100+
}
101+
96102
// Advance in our input
97103
//
98104
// Returns whether the advance succeeded. On failure, our
@@ -125,7 +131,7 @@ extension Processor {
125131
currentPosition < end ? input[currentPosition] : nil
126132
}
127133
func load(count: Int) -> Input.SubSequence? {
128-
let slice = input[currentPosition...].prefix(count)
134+
let slice = self.slice[currentPosition...].prefix(count)
129135
guard slice.count == count else { return nil }
130136
return slice
131137
}

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ extension AST {
3333
/// A node in the regex AST.
3434
@frozen
3535
public indirect enum Node:
36-
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
36+
Hashable, _TreeNode //, _ASTPrintable ASTValue, ASTAction
3737
{
3838
/// ... | ... | ...
3939
case alternation(Alternation)
@@ -94,7 +94,7 @@ extension AST.Node {
9494
}
9595
}
9696

97-
func `as`<T: _ASTNode>(_ t: T.Type = T.self) -> T? {
97+
public func `as`<T: _ASTNode>(_ t: T.Type = T.self) -> T? {
9898
_associatedValue as? T
9999
}
100100

Sources/_MatchingEngine/Regex/AST/ASTProtocols.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
// MARK: - AST parent/child
2121

22-
protocol _ASTNode: _ASTPrintable {
22+
public protocol _ASTNode: _ASTPrintable {
2323
var location: SourceLocation { get }
2424
}
2525
extension _ASTNode {
@@ -41,7 +41,7 @@ extension AST.Quantification: _ASTParent {
4141
var children: [AST.Node] { [child] }
4242
}
4343
extension AST.AbsentFunction: _ASTParent {
44-
var children: [AST.Node] {
44+
public var children: [AST.Node] {
4545
switch kind {
4646
case .repeater(let a), .stopper(let a): return [a]
4747
case .expression(let a, _, let c): return [a, c]

Sources/_MatchingEngine/Regex/AST/Group.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,12 @@ extension AST.Group.Kind {
112112
}
113113
}
114114

115-
extension AST.Group {
115+
extension AST.Group.Kind {
116116
/// If this group is a lookaround assertion, return its direction
117117
/// and whether it is positive or negative. Otherwise returns
118118
/// `nil`.
119119
public var lookaroundKind: (forwards: Bool, positive: Bool)? {
120-
switch self.kind.value {
120+
switch self {
121121
case .lookahead: return (true, true)
122122
case .negativeLookahead: return (true, false)
123123
case .lookbehind: return (false, true)

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 126 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -25,71 +25,138 @@ public enum CaptureStructure: Equatable {
2525
}
2626
}
2727

28+
extension CaptureStructure {
29+
public init<C: Collection>(
30+
alternating children: C
31+
) where C.Element: _TreeNode {
32+
assert(children.count > 1)
33+
self = children
34+
.map(\.captureStructure)
35+
.reduce(.empty, +)
36+
.map(CaptureStructure.optional)
37+
}
38+
public init<C: Collection>(
39+
concatenating children: C
40+
) where C.Element: _TreeNode {
41+
self = children.map(\.captureStructure).reduce(.empty, +)
42+
}
43+
44+
public init<T: _TreeNode>(
45+
grouping child: T, as kind: AST.Group.Kind
46+
) {
47+
let innerCaptures = child.captureStructure
48+
switch kind {
49+
case .capture:
50+
self = .atom() + innerCaptures
51+
case .namedCapture(let name):
52+
self = .atom(name: name.value) + innerCaptures
53+
case .balancedCapture(let b):
54+
self = .atom(name: b.name?.value) + innerCaptures
55+
default:
56+
precondition(!kind.isCapturing)
57+
self = innerCaptures
58+
}
59+
}
60+
61+
public init<T: _TreeNode>(
62+
grouping child: T,
63+
as kind: AST.Group.Kind,
64+
withTransform transform: CaptureTransform
65+
) {
66+
let innerCaptures = child.captureStructure
67+
switch kind {
68+
case .capture:
69+
self = .atom(type: AnyType(transform.resultType)) + innerCaptures
70+
case .namedCapture(let name):
71+
self = .atom(name: name.value, type: AnyType(transform.resultType))
72+
+ innerCaptures
73+
default:
74+
self = innerCaptures
75+
}
76+
}
77+
78+
// TODO: We'll likely want/need a generalization of
79+
// conditional's condition kind.
80+
public init<T: _TreeNode>(
81+
condition: AST.Conditional.Condition.Kind,
82+
trueBranch: T,
83+
falseBranch: T
84+
) {
85+
// A conditional's capture structure is effectively that of an alternation
86+
// between the true and false branches. However the condition may also
87+
// have captures in the case of a group condition.
88+
var captures = CaptureStructure.empty
89+
switch condition {
90+
case .group(let g):
91+
captures = captures + AST.Node.group(g).captureStructure
92+
default:
93+
break
94+
}
95+
let branchCaptures = trueBranch.captureStructure +
96+
falseBranch.captureStructure
97+
self = captures + branchCaptures.map(
98+
CaptureStructure.optional)
99+
}
100+
101+
public init<T: _TreeNode>(
102+
quantifying child: T, amount: AST.Quantification.Amount
103+
) {
104+
self = child.captureStructure.map(
105+
amount == .zeroOrOne
106+
? CaptureStructure.optional
107+
: CaptureStructure.array)
108+
}
109+
110+
// TODO: Will need to adjust for DSLTree support, and
111+
// "absent" isn't the best name for these.
112+
public init(
113+
absent kind: AST.AbsentFunction.Kind
114+
) {
115+
// Only the child of an expression absent function is relevant, as the
116+
// other expressions don't actually get matched against.
117+
switch kind {
118+
case .expression(_, _, let child):
119+
self = child.captureStructure
120+
case .clearer, .repeater, .stopper:
121+
self = .empty
122+
}
123+
}
124+
125+
}
126+
28127
extension AST.Node {
29128
public var captureStructure: CaptureStructure {
30129
// Note: This implementation could be more optimized.
31130
switch self {
32-
case .alternation(let alternation):
33-
assert(alternation.children.count > 1)
34-
return alternation.children
35-
.map(\.captureStructure)
36-
.reduce(.empty, +)
37-
.map(CaptureStructure.optional)
38-
case .concatenation(let concatenation):
39-
return concatenation.children.map(\.captureStructure).reduce(.empty, +)
40-
case .group(let group):
41-
let innerCaptures = group.child.captureStructure
42-
switch group.kind.value {
43-
case .capture:
44-
return .atom() + innerCaptures
45-
case .namedCapture(let name):
46-
return .atom(name: name.value) + innerCaptures
47-
case .balancedCapture(let b):
48-
return .atom(name: b.name?.value) + innerCaptures
49-
default:
50-
precondition(!group.kind.value.isCapturing)
51-
return innerCaptures
52-
}
53-
case .groupTransform(let group, let transform):
54-
let innerCaptures = group.child.captureStructure
55-
switch group.kind.value {
56-
case .capture:
57-
return .atom(type: AnyType(transform.resultType)) + innerCaptures
58-
case .namedCapture(let name):
59-
return .atom(name: name.value, type: AnyType(transform.resultType))
60-
+ innerCaptures
61-
default:
62-
return innerCaptures
63-
}
131+
case let .alternation(a):
132+
return CaptureStructure(alternating: a.children)
133+
134+
case let .concatenation(c):
135+
return CaptureStructure(concatenating: c.children)
136+
137+
case let .group(g):
138+
return CaptureStructure(
139+
grouping: g.child, as: g.kind.value)
140+
141+
case .groupTransform(let g, let transform):
142+
return CaptureStructure(
143+
grouping: g.child,
144+
as: g.kind.value,
145+
withTransform: transform)
146+
64147
case .conditional(let c):
65-
// A conditional's capture structure is effectively that of an alternation
66-
// between the true and false branches. However the condition may also
67-
// have captures in the case of a group condition.
68-
var captures = CaptureStructure.empty
69-
switch c.condition.kind {
70-
case .group(let g):
71-
captures = captures + AST.Node.group(g).captureStructure
72-
default:
73-
break
74-
}
75-
let branchCaptures = c.trueBranch.captureStructure +
76-
c.falseBranch.captureStructure
77-
return captures + branchCaptures.map(CaptureStructure.optional)
78-
79-
case .quantification(let quantification):
80-
return quantification.child.captureStructure.map(
81-
quantification.amount.value == .zeroOrOne
82-
? CaptureStructure.optional
83-
: CaptureStructure.array)
148+
return CaptureStructure(
149+
condition: c.condition.kind,
150+
trueBranch: c.trueBranch,
151+
falseBranch: c.falseBranch)
152+
153+
case .quantification(let q):
154+
return CaptureStructure(
155+
quantifying: q.child, amount: q.amount.value)
156+
84157
case .absentFunction(let abs):
85-
// Only the child of an expression absent function is relevant, as the
86-
// other expressions don't actually get matched against.
87-
switch abs.kind {
88-
case .expression(_, _, let child):
89-
return child.captureStructure
90-
case .clearer, .repeater, .stopper:
91-
return .empty
92-
}
158+
return CaptureStructure(absent: abs.kind)
159+
93160
case .quote, .trivia, .atom, .customCharacterClass, .empty:
94161
return .empty
95162
}

0 commit comments

Comments
 (0)