Skip to content

Commit 42f1a66

Browse files
committed
DSLTree, a shared representation for compilation and printing
1 parent e719d6d commit 42f1a66

File tree

4 files changed

+450
-107
lines changed

4 files changed

+450
-107
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ extension AST {
3333
/// A node in the regex AST.
3434
@frozen
3535
public indirect enum Node:
36-
Hashable/*, _ASTPrintable ASTValue, ASTAction*/
36+
Hashable, _TreeNode //, _ASTPrintable ASTValue, ASTAction
3737
{
3838
/// ... | ... | ...
3939
case alternation(Alternation)
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
struct DSLTree {
2+
}
3+
4+
extension DSLTree {
5+
indirect enum Node: _TreeNode {
6+
/// ... | ... | ...
7+
case alternation([Node])
8+
9+
/// ... ...
10+
case concatenation([Node])
11+
12+
/// (...)
13+
case group(AST.Group.Kind, Node)
14+
15+
/// (?(cond) true-branch | false-branch)
16+
case conditional(
17+
AST.Conditional.Condition.Kind, Node, Node)
18+
19+
case quantification(
20+
AST.Quantification.Amount,
21+
AST.Quantification.Kind,
22+
Node)
23+
24+
case customCharacterClass(CustomCharacterClass)
25+
26+
case atom(Atom)
27+
28+
/// Comments, non-semantic whitespace, etc
29+
// TODO: Do we want this? Could be interesting
30+
case trivia(String)
31+
32+
// TODO: Probably some atoms, built-ins, etc.
33+
34+
case empty
35+
36+
case stringLiteral(String)
37+
38+
/// An embedded literal
39+
case regexLiteral(AST)
40+
41+
// MARK: - Tree conversions
42+
43+
/// The target of AST conversion.
44+
///
45+
/// Keeps original AST around for rich syntatic and source information
46+
case convertedRegexLiteral(Node, AST)
47+
48+
// Fall-back for when conversion fails
49+
case unconvertedRegexLiteral(AST)
50+
51+
// MARK: - Extensibility points
52+
53+
/// A capturing group (TODO: is it?) with a transformation function
54+
case groupTransform(
55+
AST.Group.Kind,
56+
Node,
57+
CaptureTransform)
58+
59+
case consumer(_ConsumerInterface)
60+
61+
case consumerValidator(_ConsumerValidatorInterface)
62+
63+
// TODO: Would this just boil down to a consumer?
64+
case characterPredicate(_CharacterPredicateInterface)
65+
}
66+
}
67+
68+
extension DSLTree {
69+
struct CustomCharacterClass {
70+
var members: [Member]
71+
var isInverted: Bool
72+
73+
enum Member {
74+
case atom(Atom)
75+
case range(Atom, Atom)
76+
case custom(CustomCharacterClass)
77+
78+
indirect case intersection(CustomCharacterClass, CustomCharacterClass)
79+
indirect case subtraction(CustomCharacterClass, CustomCharacterClass)
80+
indirect case symmetricDifference(CustomCharacterClass, CustomCharacterClass)
81+
}
82+
}
83+
84+
enum Atom {
85+
case char(Character)
86+
case scalar(Unicode.Scalar)
87+
88+
case unconverted(AST.Atom)
89+
}
90+
}
91+
92+
// CollectionConsumer
93+
typealias _ConsumerInterface = (
94+
String, Range<String.Index>
95+
) -> String.Index?
96+
97+
// Type producing consume
98+
typealias _ConsumerValidatorInterface = (
99+
String, Range<String.Index>
100+
) -> (Any, Any.Type, String.Index)?
101+
102+
// Character-set (post grapheme segmentation)
103+
typealias _CharacterPredicateInterface = (
104+
(Character) -> Bool
105+
)
106+
107+
/*
108+
109+
TODO: Use of syntactic types, like group kinds, is a
110+
little suspect. We may want to figure out a model here.
111+
112+
TODO: Do capturing groups need explicit numbers?
113+
114+
TODO: Are storing closures better/worse than existentials?
115+
116+
*/
117+
118+
extension DSLTree.Node {
119+
var children: [DSLTree.Node]? {
120+
switch self {
121+
122+
case let .alternation(v): return v
123+
case let .concatenation(v): return v
124+
125+
case let .convertedRegexLiteral(n, _):
126+
// Treat this transparently
127+
return n.children
128+
129+
case let .group(_, n): return [n]
130+
case let .groupTransform(_, n, _): return [n]
131+
case let .quantification(_, _, n): return [n]
132+
133+
case let .conditional(_, t, f): return [t,f]
134+
135+
case .trivia, .empty, .stringLiteral, .regexLiteral,
136+
.consumer, .consumerValidator, .characterPredicate,
137+
.customCharacterClass, .unconvertedRegexLiteral,
138+
.atom:
139+
return []
140+
}
141+
}
142+
}
143+
144+
extension DSLTree.Node {
145+
var ast: AST? {
146+
switch self {
147+
case let .regexLiteral(ast): return ast
148+
case let .unconvertedRegexLiteral(ast): return ast
149+
case let .convertedRegexLiteral(_, ast): return ast
150+
default: return nil
151+
}
152+
}
153+
}
154+
155+
extension AST {
156+
/// Converts an AST node to a `convertedRegexLiteral` node.
157+
var dslTreeNode: DSLTree.Node {
158+
func wrap(_ node: DSLTree.Node) -> DSLTree.Node {
159+
switch node {
160+
case .convertedRegexLiteral:
161+
assertionFailure("Double wrapping?")
162+
default:
163+
break
164+
}
165+
// TODO: Should we do this for the
166+
// single-concatenation child too, or should?
167+
// we wrap _that_?
168+
return .convertedRegexLiteral(node, self)
169+
}
170+
171+
// Convert the top-level node without wrapping
172+
func convert() -> DSLTree.Node {
173+
switch self {
174+
case let .alternation(v):
175+
let children = v.children.map(\.dslTreeNode)
176+
return .alternation(children)
177+
178+
case let .concatenation(v):
179+
// Coalesce adjacent children who can produce a
180+
// string literal representation
181+
let astChildren = v.children
182+
func coalesce(
183+
_ idx: inout Array<AST>.Index
184+
) -> String? {
185+
var result = ""
186+
while idx < astChildren.endIndex {
187+
let atom: AST.Atom? = astChildren[idx].as()
188+
guard let str = atom?.literalStringValue else {
189+
break
190+
}
191+
result += str
192+
astChildren.formIndex(after: &idx)
193+
}
194+
return result.isEmpty ? nil : result
195+
}
196+
197+
// No need to nest single children concatenations
198+
if astChildren.count == 1 {
199+
return astChildren.first!.dslTreeNode
200+
}
201+
202+
// Check for a single child post-coalescing
203+
var idx = astChildren.startIndex
204+
if let str = coalesce(&idx),
205+
idx == astChildren.endIndex
206+
{
207+
return .stringLiteral(str)
208+
}
209+
210+
// Coalesce adjacent string children
211+
var curIdx = astChildren.startIndex
212+
var children = Array<DSLTree.Node>()
213+
while curIdx < astChildren.endIndex {
214+
if let str = coalesce(&curIdx) {
215+
// TODO: Track source info...
216+
children.append(.stringLiteral(str))
217+
} else {
218+
children.append(astChildren[curIdx].dslTreeNode)
219+
children.formIndex(after: &curIdx)
220+
}
221+
}
222+
return .concatenation(children)
223+
224+
case let .group(v):
225+
let child = v.child.dslTreeNode
226+
return .group(v.kind.value, child)
227+
228+
case let .conditional(v):
229+
let trueBranch = v.trueBranch.dslTreeNode
230+
let falseBranch = v.falseBranch.dslTreeNode
231+
return .conditional(
232+
v.condition.kind, trueBranch, falseBranch)
233+
234+
case let .quantification(v):
235+
let child = v.child.dslTreeNode
236+
return .quantification(
237+
v.amount.value, v.kind.value, child)
238+
239+
case let .quote(v):
240+
return .stringLiteral(v.literal)
241+
242+
case let .trivia(v):
243+
return .trivia(v.contents)
244+
245+
case let .atom(v):
246+
if let str = v.literalStringValue {
247+
return .stringLiteral(str)
248+
}
249+
return .atom(v.dslTreeAtom)
250+
251+
case let .customCharacterClass(ccc):
252+
return .customCharacterClass(ccc.dslTreeClass)
253+
254+
case .empty(_):
255+
return .empty
256+
257+
case let .groupTransform(v, transform):
258+
let child = v.child.dslTreeNode
259+
return .groupTransform(
260+
v.kind.value, child, transform)
261+
}
262+
}
263+
264+
return wrap(convert())
265+
}
266+
}
267+
268+
extension AST.CustomCharacterClass {
269+
var dslTreeClass: DSLTree.CustomCharacterClass {
270+
// TODO: Not quite 1-1
271+
func convert(
272+
_ member: Member
273+
) -> DSLTree.CustomCharacterClass.Member {
274+
switch member {
275+
case let .custom(ccc):
276+
return .custom(ccc.dslTreeClass)
277+
278+
case let .range(r):
279+
return .range(
280+
r.lhs.dslTreeAtom, r.rhs.dslTreeAtom)
281+
282+
case let .atom(a):
283+
return .atom(a.dslTreeAtom)
284+
285+
case let .quote(q):
286+
// TODO: Probably should flatten instead of nest
287+
return .custom(.init(
288+
members: q.literal.map { .atom(.char($0)) },
289+
isInverted: false))
290+
291+
case let .setOperation(lhs, op, rhs):
292+
let lhs = DSLTree.CustomCharacterClass(
293+
members: lhs.map(convert),
294+
isInverted: false)
295+
let rhs = DSLTree.CustomCharacterClass(
296+
members: rhs.map(convert),
297+
isInverted: false)
298+
299+
switch op.value {
300+
case .subtraction:
301+
return .subtraction(lhs, rhs)
302+
case .intersection:
303+
return .intersection(lhs, rhs)
304+
case .symmetricDifference:
305+
return .symmetricDifference(lhs, rhs)
306+
}
307+
}
308+
}
309+
310+
return .init(
311+
members: members.map(convert),
312+
isInverted: self.isInverted)
313+
}
314+
}
315+
316+
extension AST.Atom {
317+
var dslTreeAtom: DSLTree.Atom {
318+
if case let .scalar(s) = kind {
319+
return .scalar(s)
320+
}
321+
if case let .char(c) = kind {
322+
return .char(c)
323+
}
324+
325+
// TODO: More conversions
326+
return .unconverted(self)
327+
}
328+
}

0 commit comments

Comments
 (0)