Skip to content

Commit 0df23fa

Browse files
committed
Validate capture lists
Begin storing source location on capture lists, and start erroring on duplicate named captures.
1 parent 0b18557 commit 0df23fa

File tree

5 files changed

+48
-17
lines changed

5 files changed

+48
-17
lines changed

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,18 @@ extension CaptureList {
2626
public var name: String?
2727
public var type: Any.Type?
2828
public var optionalDepth: Int
29+
public var location: SourceLocation
2930

3031
public init(
3132
name: String? = nil,
3233
type: Any.Type? = nil,
33-
optionalDepth: Int
34+
optionalDepth: Int,
35+
_ location: SourceLocation
3436
) {
3537
self.name = name
3638
self.type = type
3739
self.optionalDepth = optionalDepth
40+
self.location = location
3841
}
3942
}
4043
}
@@ -61,13 +64,14 @@ extension AST.Node {
6164
case let .group(g):
6265
switch g.kind.value {
6366
case .capture:
64-
list.append(.init(optionalDepth: nesting))
67+
list.append(.init(optionalDepth: nesting, g.location))
6568

6669
case .namedCapture(let name):
67-
list.append(.init(name: name.value, optionalDepth: nesting))
70+
list.append(.init(name: name.value, optionalDepth: nesting, g.location))
6871

6972
case .balancedCapture(let b):
70-
list.append(.init(name: b.name?.value, optionalDepth: nesting))
73+
list.append(.init(name: b.name?.value, optionalDepth: nesting,
74+
g.location))
7175

7276
default: break
7377
}
@@ -124,7 +128,8 @@ extension CaptureList.Capture: Equatable {
124128
public static func == (lhs: Self, rhs: Self) -> Bool {
125129
lhs.name == rhs.name &&
126130
lhs.optionalDepth == rhs.optionalDepth &&
127-
lhs.type == rhs.type
131+
lhs.type == rhs.type &&
132+
lhs.location == rhs.location
128133
}
129134
}
130135
extension CaptureList: Equatable {}

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extension RegexValidator {
3434
for opt in ast.globalOptions?.options ?? [] {
3535
try validateGlobalMatchingOption(opt)
3636
}
37+
try validateCaptures()
3738
try validateNode(ast.root)
3839
}
3940

@@ -59,6 +60,17 @@ extension RegexValidator {
5960
}
6061
}
6162

63+
func validateCaptures() throws {
64+
// TODO: Should this be validated when creating the capture list?
65+
var usedNames = Set<String>()
66+
for capture in captures.captures {
67+
guard let name = capture.name else { continue }
68+
guard usedNames.insert(name).inserted else {
69+
throw error(.duplicateNamedCapture(name), at: capture.location)
70+
}
71+
}
72+
}
73+
6274
func validateReference(_ ref: AST.Reference) throws {
6375
switch ref.kind {
6476
case .absolute(let i):

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ extension DSLTree.Node {
472472
list.append(.init(
473473
name: name,
474474
type: child.valueCaptureType?.base,
475-
optionalDepth: nesting))
475+
optionalDepth: nesting, .fake))
476476
child._addCaptures(to: &list, optionalNesting: nesting)
477477

478478
case let .nonCapturingGroup(kind, child):

Tests/RegexTests/CaptureTests.swift

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,36 +16,44 @@ import XCTest
1616

1717
extension CaptureList.Capture {
1818
static var cap: Self {
19-
return Self(optionalDepth: 0)
19+
return Self(optionalDepth: 0, .fake)
2020
}
2121

2222
static var opt: Self {
23-
return Self(optionalDepth: 1)
23+
return Self(optionalDepth: 1, .fake)
2424
}
2525
static var opt_opt: Self {
26-
return Self(optionalDepth: 2)
26+
return Self(optionalDepth: 2, .fake)
2727
}
2828
static var opt_opt_opt: Self {
29-
return Self(optionalDepth: 3)
29+
return Self(optionalDepth: 3, .fake)
3030
}
3131
static var opt_opt_opt_opt: Self {
32-
return Self(optionalDepth: 4)
32+
return Self(optionalDepth: 4, .fake)
3333
}
3434
static var opt_opt_opt_opt_opt: Self {
35-
return Self(optionalDepth: 5)
35+
return Self(optionalDepth: 5, .fake)
3636
}
3737
static var opt_opt_opt_opt_opt_opt: Self {
38-
return Self(optionalDepth: 6)
38+
return Self(optionalDepth: 6, .fake)
3939
}
4040

4141
static func named(_ name: String, opt: Int = 0) -> Self {
42-
return Self(name: name, optionalDepth: opt)
42+
return Self(name: name, optionalDepth: opt, .fake)
4343
}
4444
}
4545
extension CaptureList {
4646
static func caps(count: Int) -> Self {
4747
Self(Array(repeating: .cap, count: count))
4848
}
49+
50+
var withoutLocs: Self {
51+
var copy = self
52+
for idx in copy.captures.indices {
53+
copy.captures[idx].location = .fake
54+
}
55+
return copy
56+
}
4957
}
5058

5159
extension StructuredCapture {
@@ -151,7 +159,7 @@ func captureTest(
151159
line: UInt = #line
152160
) {
153161
let ast = try! parse(regex, .semantic, .traditional)
154-
let capList = ast.root._captureList
162+
let capList = ast.root._captureList.withoutLocs
155163
guard capList == expected else {
156164
XCTFail("""
157165
Expected:

Tests/RegexTests/ParseTests.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func parseTest(
9494
file: file, line: line)
9595
return
9696
}
97-
let captures = ast.captureList
97+
let captures = ast.captureList.withoutLocs
9898
guard captures == expectedCaptures else {
9999
XCTFail("""
100100
@@ -872,7 +872,7 @@ extension RegexTests {
872872
parseTest(
873873
"(?|(?<x>a)|(?<x>b))",
874874
nonCaptureReset(alt(namedCapture("x", "a"), namedCapture("x", "b"))),
875-
throwsError: .unsupported, captures: [.named("x", opt: 1), .named("x", opt: 1)]
875+
throwsError: .invalid, captures: [.named("x", opt: 1), .named("x", opt: 1)]
876876
)
877877

878878
// TODO: Reject mismatched names?
@@ -2532,6 +2532,12 @@ extension RegexTests {
25322532

25332533
diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
25342534

2535+
diagnosticTest("(?<x>)(?<x>)", .duplicateNamedCapture("x"))
2536+
diagnosticTest("(?<x>)|(?<x>)", .duplicateNamedCapture("x"))
2537+
diagnosticTest("((?<x>))(?<x>)", .duplicateNamedCapture("x"))
2538+
diagnosticTest("(|(?<x>))(?<x>)", .duplicateNamedCapture("x"))
2539+
diagnosticTest("(?<x>)(?<y>)(?<x>)", .duplicateNamedCapture("x"))
2540+
25352541
// MARK: Quantifiers
25362542

25372543
diagnosticTest("*", .quantifierRequiresOperand("*"))

0 commit comments

Comments
 (0)