Skip to content

Commit 4ee95a0

Browse files
committed
DSLTree hooked up to legacy VMs
1 parent c650aaf commit 4ee95a0

File tree

4 files changed

+143
-34
lines changed

4 files changed

+143
-34
lines changed

Sources/_StringProcessing/CharacterClass.swift

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,22 @@ extension AST.Node {
323323
}
324324
}
325325

326+
extension DSLTree.Node {
327+
var characterClass: CharacterClass? {
328+
switch self {
329+
case let .customCharacterClass(ccc):
330+
return ccc.modelCharacterClass
331+
case let .atom(a):
332+
return a.characterClass
333+
case .characterPredicate:
334+
// FIXME: Do we make one from this?
335+
return nil
336+
default:
337+
return nil
338+
}
339+
}
340+
}
341+
326342
extension CharacterClass {
327343
public func withMatchLevel(
328344
_ level: CharacterClass.MatchLevel
@@ -333,6 +349,17 @@ extension CharacterClass {
333349
}
334350
}
335351

352+
extension DSLTree.Atom {
353+
var characterClass: CharacterClass? {
354+
switch self {
355+
case let .unconverted(a):
356+
return a.characterClass
357+
358+
default: return nil
359+
}
360+
}
361+
}
362+
336363
extension AST.Atom {
337364
var characterClass: CharacterClass? {
338365
switch kind {
@@ -389,6 +416,74 @@ extension AST.Atom.EscapedBuiltin {
389416
}
390417
}
391418

419+
extension DSLTree.CustomCharacterClass {
420+
// TODO: Refactor a bit, and... can we drop this type?
421+
var modelCharacterClass: CharacterClass? {
422+
var result =
423+
Array<CharacterClass.CharacterSetComponent>()
424+
for m in members {
425+
switch m {
426+
case let .atom(a):
427+
if let cc = a.characterClass {
428+
result.append(.characterClass(cc))
429+
} else if let c = a.literalCharacterValue {
430+
result.append(.character(c))
431+
} else {
432+
return nil
433+
}
434+
case let .range(low, high):
435+
guard let lhs = low.literalCharacterValue,
436+
let rhs = high.literalCharacterValue
437+
else {
438+
return nil
439+
}
440+
result.append(.range(lhs...rhs))
441+
442+
case let .custom(ccc):
443+
guard let cc = ccc.modelCharacterClass else {
444+
return nil
445+
}
446+
result.append(.characterClass(cc))
447+
448+
case let .intersection(lhs, rhs):
449+
guard let lhs = lhs.modelCharacterClass,
450+
let rhs = rhs.modelCharacterClass
451+
else {
452+
return nil
453+
}
454+
result.append(.setOperation(
455+
lhs: .characterClass(lhs),
456+
op: .intersection,
457+
rhs: .characterClass(rhs)))
458+
459+
case let .subtraction(lhs, rhs):
460+
guard let lhs = lhs.modelCharacterClass,
461+
let rhs = rhs.modelCharacterClass
462+
else {
463+
return nil
464+
}
465+
result.append(.setOperation(
466+
lhs: .characterClass(lhs),
467+
op: .subtraction,
468+
rhs: .characterClass(rhs)))
469+
470+
case let .symmetricDifference(lhs, rhs):
471+
guard let lhs = lhs.modelCharacterClass,
472+
let rhs = rhs.modelCharacterClass
473+
else {
474+
return nil
475+
}
476+
result.append(.setOperation(
477+
lhs: .characterClass(lhs),
478+
op: .symmetricDifference,
479+
rhs: .characterClass(rhs)))
480+
}
481+
}
482+
let cc = CharacterClass.custom(result)
483+
return isInverted ? cc.inverted : cc
484+
}
485+
}
486+
392487
extension AST.CustomCharacterClass {
393488
/// The model character class for this custom character class.
394489
var modelCharacterClass: CharacterClass? {

Sources/_StringProcessing/Legacy/LegacyCompile.swift

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,20 @@ import _MatchingEngine
1313

1414
func compile(
1515
_ ast: AST, options: REOptions = .none
16+
) throws -> RECode {
17+
try compile(ast.dslTree, options: options)
18+
}
19+
20+
func compile(
21+
_ ast: DSLTree, options: REOptions = .none
1622
) throws -> RECode {
1723
var currentLabel = 0
1824
func createLabel() -> RECode.Instruction {
1925
defer { currentLabel += 1}
2026
return .label(currentLabel)
2127
}
2228
var instructions = RECode.InstructionList()
23-
func compileNode(_ ast: AST.Node) throws {
24-
29+
func compileNode(_ ast: DSLTree.Node) throws {
2530
if let cc = ast.characterClass {
2631
instructions.append(.characterClass(cc))
2732
return
@@ -30,12 +35,12 @@ func compile(
3035
switch ast {
3136
case .trivia, .empty: return
3237

33-
case .quote(let s):
34-
s.literal.forEach { instructions.append(.character($0)) }
38+
case let .quotedLiteral(s):
39+
s.forEach { instructions.append(.character($0)) }
3540
return
3641

37-
case .atom(let a):
38-
switch a.kind {
42+
case let .atom(a):
43+
switch a {
3944
case .char(let c):
4045
instructions.append(.character(c))
4146
return
@@ -46,37 +51,36 @@ func compile(
4651
instructions.append(.any)
4752
return
4853
default:
49-
throw unsupported("Unsupported: \(a._dumpBase)")
54+
throw unsupported("Unsupported: \(a)")
5055
}
5156

52-
case .group(let g):
53-
switch g.kind.value {
57+
case let .group(kind, child):
58+
switch kind {
5459
case .nonCapture:
5560
instructions.append(.beginGroup)
56-
try compileNode(g.child)
61+
try compileNode(child)
5762
instructions.append(.endGroup)
5863
return
5964
case .capture:
6065
instructions.append(.beginCapture)
61-
try compileNode(g.child)
66+
try compileNode(child)
6267
instructions.append(.endCapture())
6368
return
6469

6570
default:
66-
throw unsupported("Unsupported group \(g.kind.value) \(g)")
71+
throw unsupported("Unsupported group \(kind)")
6772
}
6873

69-
case let .groupTransform(g, transform: t) where g.kind.value == .capture:
74+
case let .groupTransform(kind, child, transform) where kind == .capture:
7075
instructions.append(.beginCapture)
71-
try compileNode(g.child)
72-
instructions.append(.endCapture(transform: t))
76+
try compileNode(child)
77+
instructions.append(.endCapture(transform: transform))
7378
return
7479

75-
case .groupTransform(let g, _):
76-
throw unsupported("Unsupported group \(g)")
80+
case let .groupTransform(kind, _, _):
81+
throw unsupported("Unsupported group transform \(kind)")
7782

78-
case .concatenation(let concat):
79-
let children = concat.children
83+
case let .concatenation(children):
8084
let childrenHaveCaptures = children.any(\.hasCapture)
8185
if childrenHaveCaptures {
8286
instructions.append(.beginGroup)
@@ -87,9 +91,8 @@ func compile(
8791
}
8892
return
8993

90-
case .quantification(let quant):
91-
let child = quant.child
92-
switch (quant.amount.value, quant.kind.value) {
94+
case let .quantification(amount, kind, child):
95+
switch (amount, kind) {
9396
case (.zeroOrMore, .eager):
9497
// a* ==> L_START, <split L_DONE>, a, goto L_START, L_DONE
9598
let childHasCaptures = child.hasCapture
@@ -221,10 +224,10 @@ func compile(
221224
}
222225
return
223226
default:
224-
throw unsupported("Unsupported: \(quant._dumpBase)")
227+
throw unsupported("Unsupported: \((amount, kind))")
225228
}
226229

227-
case .alternation(let alt):
230+
case let .alternation(children):
228231
// a|b ==> <split L_B>, a, goto L_DONE, L_B, b, L_DONE
229232
// a|b|c ==> <split L_B>, a, goto L_DONE,
230233
// L_B, <split L_C>, b, goto L_DONE, L_C, c, L_DONE
@@ -237,7 +240,6 @@ func compile(
237240
// E.g. `a` falls-through to the rest of the program and the
238241
// other cases branch back.
239242
//
240-
let children = alt.children
241243
assert(!children.isEmpty)
242244
guard children.count > 1 else {
243245
return try compileNode(children[0])
@@ -258,16 +260,25 @@ func compile(
258260
return
259261

260262
case .conditional:
261-
throw unsupported(ast.renderAsCanonical())
263+
throw unsupported("Conditionals")
262264

263265
case .absentFunction:
264-
throw unsupported(ast.renderAsCanonical())
266+
throw unsupported("Absent functions")
265267

266268
case .customCharacterClass:
267269
fatalError("unreachable")
268270

269-
case .atom(let a) where a.characterClass != nil:
271+
case let .atom(a) where a.characterClass != nil:
270272
fatalError("unreachable")
273+
274+
case let .convertedRegexLiteral(node, _):
275+
try compileNode(node)
276+
277+
case .characterPredicate, .consumer, .consumerValidator:
278+
throw unsupported("DSL extensions")
279+
280+
case let .regexLiteral(re):
281+
try compileNode(re.dslTreeNode)
271282
}
272283
}
273284

Sources/_StringProcessing/RegexDSL/Core.swift

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,7 @@ public struct Regex<Match: MatchProtocol>: RegexProtocol {
6464
/// The legacy `RECode` for execution with a legacy VM.
6565
lazy private(set) var legacyLoweredProgram: RECode = {
6666
do {
67-
guard let ast = tree.ast else {
68-
throw "Extended support unavailable in legacy VM"
69-
}
70-
return try compile(ast)
67+
return try compile(tree)
7168
} catch {
7269
fatalError("Regex engine internal error: \(String(describing: error))")
7370
}
@@ -182,6 +179,7 @@ extension RegexProtocol {
182179
}
183180
return RegexMatch(range: range, match: convertedMatch)
184181
}
182+
185183
let executor = Executor(program: regex.program.loweredProgram)
186184
guard let result = executor.execute(
187185
input: input, in: inputRange, mode: mode

Sources/_StringProcessing/RegexDSL/DSLTree.swift

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,14 @@ extension DSLTree {
417417
extension DSLTree.Node {
418418
var hasCapture: Bool {
419419
switch self {
420-
case .group(let k, _) where k.isCapturing,
421-
.groupTransform(let k, _, _) where k.isCapturing:
420+
case let .group(k, _) where k.isCapturing,
421+
let .groupTransform(k, _, _) where k.isCapturing:
422422
return true
423+
case let .convertedRegexLiteral(n, re):
424+
assert(n.hasCapture == re.hasCapture)
425+
return n.hasCapture
426+
case let .regexLiteral(re):
427+
return re.hasCapture
423428
default:
424429
break
425430
}

0 commit comments

Comments
 (0)