Skip to content

Commit d8ce370

Browse files
committed
Flatten optional nesting for regex literal captures
When computing the CaptureList for AST nodes, including converted AST -> DSL nodes, only permit at most one level of optionality. This means that regex literal captures are now either `Substring` or `Substring?`. Optional nesting is however still performed in the DSL (due to result builder limitations). If a regex literal is nested in the DSL, it may only add at most one extra level of optionality to the current nesting level.
1 parent 3bd6b8b commit d8ce370

File tree

4 files changed

+252
-123
lines changed

4 files changed

+252
-123
lines changed

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,63 +57,105 @@ extension CaptureList {
5757
}
5858
}
5959

60+
extension CaptureList {
61+
public struct Builder {
62+
public var captures = CaptureList()
63+
64+
public init() {}
65+
66+
public struct OptionalNesting {
67+
// We maintain two depths, inner and outer. These allow e.g the nesting
68+
// of a regex literal in a DSL, where outside of the scope of the literal,
69+
// nesting is allowed, but inside the literal at most one extra layer of
70+
// optionality may be added.
71+
public var outerDepth: Int
72+
public var canNest: Bool
73+
public var innerDepth: Int
74+
75+
internal init(outerDepth: Int, canNest: Bool) {
76+
self.outerDepth = outerDepth
77+
self.canNest = canNest
78+
self.innerDepth = 0
79+
}
80+
81+
public init(canNest: Bool) {
82+
self.init(outerDepth: 0, canNest: canNest)
83+
}
84+
85+
public var depth: Int { outerDepth + innerDepth }
86+
87+
public var disablingNesting: Self {
88+
// If we are currently able to nest, store the current depth as the
89+
// outer depth, and disable nesting for an inner scope.
90+
guard canNest else { return self }
91+
return .init(outerDepth: depth, canNest: false)
92+
}
93+
94+
public var addingOptional: Self {
95+
var result = self
96+
result.innerDepth = canNest ? innerDepth + 1 : 1
97+
return result
98+
}
99+
}
100+
}
101+
}
102+
60103
// MARK: Generating from AST
61104

62-
extension AST.Node {
63-
public func _addCaptures(
64-
to list: inout CaptureList,
65-
optionalNesting nesting: Int
105+
extension CaptureList.Builder {
106+
public mutating func addCaptures(
107+
of node: AST.Node, optionalNesting nesting: OptionalNesting
66108
) {
67-
let addOptional = nesting+1
68-
switch self {
109+
switch node {
69110
case let .alternation(a):
70111
for child in a.children {
71-
child._addCaptures(to: &list, optionalNesting: addOptional)
112+
addCaptures(of: child, optionalNesting: nesting.addingOptional)
72113
}
73114

74115
case let .concatenation(c):
75116
for child in c.children {
76-
child._addCaptures(to: &list, optionalNesting: nesting)
117+
addCaptures(of: child, optionalNesting: nesting)
77118
}
78119

79120
case let .group(g):
80121
switch g.kind.value {
81122
case .capture:
82-
list.append(.init(optionalDepth: nesting, g.location))
123+
captures.append(.init(optionalDepth: nesting.depth, g.location))
83124

84125
case .namedCapture(let name):
85-
list.append(.init(name: name.value, optionalDepth: nesting, g.location))
126+
captures.append(.init(
127+
name: name.value, optionalDepth: nesting.depth, g.location))
86128

87129
case .balancedCapture(let b):
88-
list.append(.init(name: b.name?.value, optionalDepth: nesting,
89-
g.location))
130+
captures.append(.init(
131+
name: b.name?.value, optionalDepth: nesting.depth, g.location))
90132

91133
default: break
92134
}
93-
g.child._addCaptures(to: &list, optionalNesting: nesting)
135+
addCaptures(of: g.child, optionalNesting: nesting)
94136

95137
case .conditional(let c):
96138
switch c.condition.kind {
97139
case .group(let g):
98-
AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting)
140+
addCaptures(of: .group(g), optionalNesting: nesting)
99141
default:
100142
break
101143
}
102144

103-
c.trueBranch._addCaptures(to: &list, optionalNesting: addOptional)
104-
c.falseBranch._addCaptures(to: &list, optionalNesting: addOptional)
145+
addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional)
146+
addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional)
105147

106148
case .quantification(let q):
107149
var optNesting = nesting
108150
if q.amount.value.bounds.atLeast == 0 {
109-
optNesting += 1
151+
optNesting = optNesting.addingOptional
110152
}
111-
q.child._addCaptures(to: &list, optionalNesting: optNesting)
153+
addCaptures(of: q.child, optionalNesting: optNesting)
112154

113155
case .absentFunction(let abs):
114156
switch abs.kind {
115157
case .expression(_, _, let child):
116-
child._addCaptures(to: &list, optionalNesting: nesting)
158+
addCaptures(of: child, optionalNesting: nesting)
117159
case .clearer, .repeater, .stopper:
118160
break
119161
}
@@ -122,16 +164,17 @@ extension AST.Node {
122164
break
123165
}
124166
}
167+
public static func build(_ ast: AST) -> CaptureList {
168+
var builder = Self()
169+
builder.captures.append(.init(optionalDepth: 0, .fake))
170+
builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false))
171+
return builder.captures
172+
}
125173
}
126174

127175
extension AST {
128176
/// The capture list (including the whole match) of this AST.
129-
public var captureList: CaptureList {
130-
var caps = CaptureList()
131-
caps.append(.init(optionalDepth: 0, .fake))
132-
root._addCaptures(to: &caps, optionalNesting: 0)
133-
return caps
134-
}
177+
public var captureList: CaptureList { .Builder.build(self) }
135178
}
136179

137180
// MARK: Convenience for testing and inspection

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -545,68 +545,62 @@ struct CaptureTransform: Hashable, CustomStringConvertible {
545545
}
546546
}
547547

548-
// MARK: AST wrapper types
549-
//
550-
// These wrapper types are required because even @_spi-marked public APIs can't
551-
// include symbols from implementation-only dependencies.
552-
553-
extension DSLTree.Node {
554-
func _addCaptures(
555-
to list: inout CaptureList,
556-
optionalNesting nesting: Int
548+
extension CaptureList.Builder {
549+
mutating func addCaptures(
550+
of node: DSLTree.Node, optionalNesting nesting: OptionalNesting
557551
) {
558-
let addOptional = nesting+1
559-
switch self {
552+
switch node {
560553
case let .orderedChoice(children):
561554
for child in children {
562-
child._addCaptures(to: &list, optionalNesting: addOptional)
555+
addCaptures(of: child, optionalNesting: nesting.addingOptional)
563556
}
564557

565558
case let .concatenation(children):
566559
for child in children {
567-
child._addCaptures(to: &list, optionalNesting: nesting)
560+
addCaptures(of: child, optionalNesting: nesting)
568561
}
569562

570563
case let .capture(name, _, child, transform):
571-
list.append(.init(
564+
captures.append(.init(
572565
name: name,
573566
type: transform?.resultType ?? child.wholeMatchType,
574-
optionalDepth: nesting, .fake))
575-
child._addCaptures(to: &list, optionalNesting: nesting)
567+
optionalDepth: nesting.depth, .fake))
568+
addCaptures(of: child, optionalNesting: nesting)
576569

577570
case let .nonCapturingGroup(kind, child):
578571
assert(!kind.ast.isCapturing)
579-
child._addCaptures(to: &list, optionalNesting: nesting)
572+
addCaptures(of: child, optionalNesting: nesting)
580573

581574
case let .conditional(cond, trueBranch, falseBranch):
582575
switch cond.ast {
583576
case .group(let g):
584-
AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting)
577+
addCaptures(of: .group(g), optionalNesting: nesting)
585578
default:
586579
break
587580
}
588581

589-
trueBranch._addCaptures(to: &list, optionalNesting: addOptional)
590-
falseBranch._addCaptures(to: &list, optionalNesting: addOptional)
591-
582+
addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional)
583+
addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional)
592584

593585
case let .quantification(amount, _, child):
594586
var optNesting = nesting
595587
if amount.ast.bounds.atLeast == 0 {
596-
optNesting += 1
588+
optNesting = optNesting.addingOptional
597589
}
598-
child._addCaptures(to: &list, optionalNesting: optNesting)
590+
addCaptures(of: child, optionalNesting: optNesting)
599591

600592
case let .absentFunction(abs):
601593
switch abs.ast.kind {
602594
case .expression(_, _, let child):
603-
child._addCaptures(to: &list, optionalNesting: nesting)
595+
addCaptures(of: child, optionalNesting: nesting)
604596
case .clearer, .repeater, .stopper:
605597
break
606598
}
607599

608600
case let .convertedRegexLiteral(n, _):
609-
return n._addCaptures(to: &list, optionalNesting: nesting)
601+
// We disable nesting for converted AST trees, as literals do not nest
602+
// captures. This includes literals nested in a DSL.
603+
return addCaptures(of: n, optionalNesting: nesting.disablingNesting)
610604

611605
case .matcher:
612606
break
@@ -617,6 +611,16 @@ extension DSLTree.Node {
617611
}
618612
}
619613

614+
static func build(_ dsl: DSLTree) -> CaptureList {
615+
var builder = Self()
616+
builder.captures.append(
617+
.init(type: dsl.root.wholeMatchType, optionalDepth: 0, .fake))
618+
builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true))
619+
return builder.captures
620+
}
621+
}
622+
623+
extension DSLTree.Node {
620624
/// Returns true if the node is output-forwarding, i.e. not defining its own
621625
/// output but forwarding its only child's output.
622626
var isOutputForwarding: Bool {
@@ -651,13 +655,13 @@ extension DSLTree.Node {
651655
}
652656
}
653657

658+
// MARK: AST wrapper types
659+
//
660+
// These wrapper types are required because even @_spi-marked public APIs can't
661+
// include symbols from implementation-only dependencies.
662+
654663
extension DSLTree {
655-
var captureList: CaptureList {
656-
var list = CaptureList()
657-
list.append(.init(type: root.wholeMatchType, optionalDepth: 0, .fake))
658-
root._addCaptures(to: &list, optionalNesting: 0)
659-
return list
660-
}
664+
var captureList: CaptureList { .Builder.build(self) }
661665

662666
/// Presents a wrapped version of `DSLTree.Node` that can provide an internal
663667
/// `_TreeNode` conformance.

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,88 @@ class RegexDSLTests: XCTestCase {
12601260
}
12611261

12621262
func testOptionalNesting() throws {
1263+
try _testDSLCaptures(
1264+
("a", ("a", nil)),
1265+
("", ("", nil)),
1266+
("b", ("b", "b")),
1267+
("bb", ("bb", "b")),
1268+
matchType: (Substring, Substring?).self, ==)
1269+
{
1270+
try! Regex("(?:a|(b)*)?", as: (Substring, Substring?).self)
1271+
}
1272+
1273+
try _testDSLCaptures(
1274+
("a", ("a", nil)),
1275+
("", ("", nil)),
1276+
("b", ("b", "b")),
1277+
("bb", ("bb", "b")),
1278+
matchType: (Substring, Substring??).self, ==)
1279+
{
1280+
Optionally {
1281+
try! Regex("a|(b)*", as: (Substring, Substring?).self)
1282+
}
1283+
}
1284+
1285+
try _testDSLCaptures(
1286+
("a", ("a", nil)),
1287+
("", ("", nil)),
1288+
("b", ("b", "b")),
1289+
("bb", ("bb", "b")),
1290+
matchType: (Substring, Substring???).self, ==)
1291+
{
1292+
Optionally {
1293+
ChoiceOf {
1294+
try! Regex("a", as: Substring.self)
1295+
try! Regex("(b)*", as: (Substring, Substring?).self)
1296+
}
1297+
}
1298+
}
1299+
1300+
try _testDSLCaptures(
1301+
("a", ("a", nil)),
1302+
("", ("", nil)),
1303+
("b", ("b", "b")),
1304+
("bb", ("bb", "b")),
1305+
matchType: (Substring, Substring??).self, ==)
1306+
{
1307+
ChoiceOf {
1308+
try! Regex("a", as: Substring.self)
1309+
try! Regex("(b)*", as: (Substring, Substring?).self)
1310+
}
1311+
}
1312+
1313+
try _testDSLCaptures(
1314+
("a", ("a", nil)),
1315+
("", ("", nil)),
1316+
("b", ("b", "b")),
1317+
("bb", ("bb", "b")),
1318+
matchType: (Substring, Substring??).self, ==)
1319+
{
1320+
ChoiceOf {
1321+
try! Regex("a", as: Substring.self)
1322+
ZeroOrMore {
1323+
try! Regex("(b)", as: (Substring, Substring).self)
1324+
}
1325+
}
1326+
}
1327+
1328+
try _testDSLCaptures(
1329+
("a", ("a", nil)),
1330+
("", ("", nil)),
1331+
("b", ("b", "b")),
1332+
("bb", ("bb", "b")),
1333+
matchType: (Substring, Substring??).self, ==)
1334+
{
1335+
ChoiceOf {
1336+
try! Regex("a", as: Substring.self)
1337+
ZeroOrMore {
1338+
Capture {
1339+
try! Regex("b", as: Substring.self)
1340+
}
1341+
}
1342+
}
1343+
}
1344+
12631345
let r = Regex {
12641346
Optionally {
12651347
Optionally {

0 commit comments

Comments
 (0)