Skip to content

Commit 9ccde19

Browse files
authored
Support obtaining captures by name on AnyRegexOutput (#300)
Resolves #266.
1 parent e1604a6 commit 9ccde19

File tree

8 files changed

+46
-11
lines changed

8 files changed

+46
-11
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,11 +587,11 @@ extension Compiler.ByteCodeGen {
587587
try emitConcatenationComponent(child)
588588
}
589589

590-
case let .capture(_, refId, child):
590+
case let .capture(name, refId, child):
591591
options.beginScope()
592592
defer { options.endScope() }
593593

594-
let cap = builder.makeCapture(id: refId)
594+
let cap = builder.makeCapture(id: refId, name: name)
595595
switch child {
596596
case let .matcher(_, m):
597597
emitMatcher(m, into: cap)

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ extension MEProgram where Input.Element: Hashable {
4545
// Symbolic reference resolution
4646
var unresolvedReferences: [ReferenceID: [InstructionAddress]] = [:]
4747
var referencedCaptureOffsets: [ReferenceID: Int] = [:]
48+
var namedCaptureOffsets: [String: Int] = [:]
4849
var captureCount: Int {
4950
// We currently deduce the capture count from the capture register number.
5051
nextCaptureRegister.rawValue
@@ -353,7 +354,8 @@ extension MEProgram.Builder {
353354
staticMatcherFunctions: matcherFunctions,
354355
registerInfo: regInfo,
355356
captureStructure: captureStructure,
356-
referencedCaptureOffsets: referencedCaptureOffsets)
357+
referencedCaptureOffsets: referencedCaptureOffsets,
358+
namedCaptureOffsets: namedCaptureOffsets)
357359
}
358360

359361
mutating func reset() { self = Self() }
@@ -438,14 +440,20 @@ fileprivate extension MEProgram.Builder {
438440

439441
// Register helpers
440442
extension MEProgram.Builder {
441-
mutating func makeCapture(id: ReferenceID?) -> CaptureRegister {
443+
mutating func makeCapture(
444+
id: ReferenceID?, name: String?
445+
) -> CaptureRegister {
442446
defer { nextCaptureRegister.rawValue += 1 }
443447
// Register the capture for later lookup via symbolic references.
444448
if let id = id {
445449
let preexistingValue = referencedCaptureOffsets.updateValue(
446450
captureCount, forKey: id)
447451
assert(preexistingValue == nil)
448452
}
453+
if let name = name {
454+
// TODO: Reject duplicate capture names unless `(?J)`?
455+
namedCaptureOffsets.updateValue(captureCount, forKey: name)
456+
}
449457
return nextCaptureRegister
450458
}
451459

Sources/_StringProcessing/Engine/MECapture.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ extension Processor._StoredCapture: CustomStringConvertible {
145145
struct CaptureList {
146146
var values: Array<Processor<String>._StoredCapture>
147147
var referencedCaptureOffsets: [ReferenceID: Int]
148+
var namedCaptureOffsets: [String: Int]
148149

149150
// func extract(from s: String) -> Array<Array<Substring>> {
150151
// caps.map { $0.map { s[$0] } }

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct MEProgram<Input: Collection> where Input.Element: Equatable {
3636

3737
let captureStructure: CaptureStructure
3838
let referencedCaptureOffsets: [ReferenceID: Int]
39+
let namedCaptureOffsets: [String: Int]
3940
}
4041

4142
extension MEProgram: CustomStringConvertible {

Sources/_StringProcessing/Executor.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ struct Executor {
3737

3838
let capList = CaptureList(
3939
values: cpu.storedCaptures,
40-
referencedCaptureOffsets: engine.program.referencedCaptureOffsets)
40+
referencedCaptureOffsets: engine.program.referencedCaptureOffsets,
41+
namedCaptureOffsets: engine.program.namedCaptureOffsets)
4142

4243
let capStruct = engine.program.captureStructure
4344
let range = inputRange.lowerBound..<endIdx
@@ -62,6 +63,7 @@ struct Executor {
6263
range: range,
6364
rawCaptures: caps,
6465
referencedCaptureOffsets: capList.referencedCaptureOffsets,
66+
namedCaptureOffsets: capList.namedCaptureOffsets,
6567
value: value)
6668
}
6769

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,17 @@ extension Regex.Match where Output == AnyRegexOutput {
3838
) -> Substring {
3939
input[range]
4040
}
41+
42+
public subscript(name: String) -> AnyRegexOutput.Element? {
43+
namedCaptureOffsets[name].map { self[$0 + 1] }
44+
}
4145
}
4246

4347
/// A type-erased regex output
4448
@available(SwiftStdlib 5.7, *)
4549
public struct AnyRegexOutput {
4650
let input: String
51+
let namedCaptureOffsets: [String: Int]
4752
fileprivate let _elements: [ElementRepresentation]
4853

4954
/// The underlying representation of the element of a type-erased regex
@@ -94,9 +99,12 @@ extension AnyRegexOutput {
9499
@available(SwiftStdlib 5.7, *)
95100
extension AnyRegexOutput {
96101
internal init<C: Collection>(
97-
input: String, elements: C
102+
input: String, namedCaptureOffsets: [String: Int], elements: C
98103
) where C.Element == StructuredCapture {
99-
self.init(input: input, _elements: elements.map(ElementRepresentation.init))
104+
self.init(
105+
input: input,
106+
namedCaptureOffsets: namedCaptureOffsets,
107+
_elements: elements.map(ElementRepresentation.init))
100108
}
101109
}
102110

@@ -170,6 +178,13 @@ extension AnyRegexOutput: RandomAccessCollection {
170178
}
171179
}
172180

181+
@available(SwiftStdlib 5.7, *)
182+
extension AnyRegexOutput {
183+
public subscript(name: String) -> Element? {
184+
namedCaptureOffsets[name].map { self[$0 + 1] }
185+
}
186+
}
187+
173188
@available(SwiftStdlib 5.7, *)
174189
extension Regex.Match where Output == AnyRegexOutput {
175190
/// Creates a type-erased regex match from an existing match.

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ extension Regex {
2626

2727
let referencedCaptureOffsets: [ReferenceID: Int]
2828

29+
let namedCaptureOffsets: [String: Int]
30+
2931
let value: Any?
3032
}
3133
}
@@ -40,6 +42,7 @@ extension Regex.Match {
4042
storedCapture: StoredCapture(range: range, value: nil))
4143
let output = AnyRegexOutput(
4244
input: input,
45+
namedCaptureOffsets: namedCaptureOffsets,
4346
elements: [wholeMatchAsCapture] + rawCaptures)
4447
return output as! Output
4548
} else if Output.self == Substring.self {

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,9 @@ class RegexDSLTests: XCTestCase {
689689
}
690690
do {
691691
let regex = try Regex(
692-
compiling: #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#)
692+
compiling: #"""
693+
(?<lower>[0-9A-F]+)(?:\.\.(?<upper>[0-9A-F]+))?\s+;\s+(?<desc>\w+).*
694+
"""#)
693695
let line = """
694696
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
695697
COMBINING MARK TUKWENTIS
@@ -699,13 +701,16 @@ class RegexDSLTests: XCTestCase {
699701
let output = match.output
700702
XCTAssertEqual(output[0].substring, line[...])
701703
XCTAssertTrue(output[1].substring == "A6F0")
704+
XCTAssertTrue(output["lower"]?.substring == "A6F0")
702705
XCTAssertTrue(output[2].substring == "A6F1")
706+
XCTAssertTrue(output["upper"]?.substring == "A6F1")
703707
XCTAssertTrue(output[3].substring == "Extend")
708+
XCTAssertTrue(output["desc"]?.substring == "Extend")
704709
let typedOutput = try XCTUnwrap(output.as(
705-
(Substring, Substring, Substring?, Substring).self))
710+
(Substring, lower: Substring, upper: Substring?, Substring).self))
706711
XCTAssertEqual(typedOutput.0, line[...])
707-
XCTAssertTrue(typedOutput.1 == "A6F0")
708-
XCTAssertTrue(typedOutput.2 == "A6F1")
712+
XCTAssertTrue(typedOutput.lower == "A6F0")
713+
XCTAssertTrue(typedOutput.upper == "A6F1")
709714
XCTAssertTrue(typedOutput.3 == "Extend")
710715
}
711716
}

0 commit comments

Comments
 (0)