Skip to content

Commit a877e24

Browse files
committed
Obtain match output elements without materializing the output.
Resolves #267.
1 parent 32afc43 commit a877e24

File tree

4 files changed

+108
-11
lines changed

4 files changed

+108
-11
lines changed

Sources/_RegexParser/Utility/TypeConstruction.swift

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,62 @@ public enum TypeConstruction {
139139
return _openExistential(childType, do: helper)
140140
}
141141
}
142+
143+
extension TypeConstruction {
144+
public static func optionalType<Base>(
145+
of base: Base.Type, depth: Int = 1
146+
) -> Any.Type {
147+
switch depth {
148+
case 0: return base
149+
case 1: return Base?.self
150+
case 2: return Base??.self
151+
case 3: return Base???.self
152+
case 4: return Base????.self
153+
default:
154+
return optionalType(of: Base????.self, depth: depth - 4)
155+
}
156+
}
157+
}
158+
159+
extension MemoryLayout {
160+
/// Returns the element index that corresponnds to the given tuple element key
161+
/// path.
162+
/// - Parameters:
163+
/// - keyPath: The key path from a tuple to one of its elements.
164+
/// - elementTypes: The element type of the tuple type.
165+
// TODO: It possible to get element types from the type metadata, but it's
166+
// more efficient to pass them in since we already know them in the matching
167+
// engine.
168+
public static func tupleElementIndex<ElementTypes: Collection>(
169+
of keyPath: PartialKeyPath<T>,
170+
elementTypes: ElementTypes
171+
) -> Int? where ElementTypes.Element == Any.Type {
172+
guard let byteOffset = offset(of: keyPath) else {
173+
return nil
174+
}
175+
if byteOffset == 0 { return 0 }
176+
var currentOffset = 0
177+
for (index, type) in elementTypes.enumerated() {
178+
func sizeAndAlignMask<T>(_: T.Type) -> (Int, Int) {
179+
(MemoryLayout<T>.size, MemoryLayout<T>.alignment - 1)
180+
}
181+
// The ABI of an offset-based key path only stores the byte offset, so
182+
// this doesn't work if there's a 0-sized element, e.g. `Void`,
183+
// `(Void, Void)`. (rdar://63819465)
184+
if size == 0 {
185+
return nil
186+
}
187+
let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask)
188+
// Align up the offset for this type.
189+
currentOffset = (currentOffset + alignMask) & ~alignMask
190+
// If it matches the offset we are looking for, `index` is the tuple
191+
// element index.
192+
if currentOffset == byteOffset {
193+
return index
194+
}
195+
// Advance to the past-the-end offset for this element.
196+
currentOffset += size
197+
}
198+
return nil
199+
}
200+
}

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,11 @@ extension AnyRegexOutput: RandomAccessCollection {
6262

6363
/// The captured value, `nil` for no-capture
6464
public var value: Any? {
65-
// FIXME: Should this return the substring for default-typed
66-
// values?
67-
representation.value
65+
representation.value ?? substring
66+
}
67+
68+
internal var type: Any.Type {
69+
representation.type
6870
}
6971

7072
/// The name of this capture, if it has one, otherwise `nil`.
@@ -263,4 +265,9 @@ extension AnyRegexOutput.ElementRepresentation {
263265
optionalCount: optionalDepth
264266
)
265267
}
268+
269+
var type: Any.Type {
270+
value.map { Swift.type(of: $0) }
271+
?? TypeConstruction.optionalType(of: Substring.self, depth: optionalDepth)
272+
}
266273
}

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ extension Regex {
2828

2929
@available(SwiftStdlib 5.7, *)
3030
extension Regex.Match {
31+
var input: String {
32+
anyRegexOutput.input
33+
}
34+
3135
/// The output produced from the match operation.
3236
public var output: Output {
3337
if Output.self == AnyRegexOutput.self {
@@ -37,33 +41,48 @@ extension Regex.Match {
3741
)
3842

3943
let output = AnyRegexOutput(
40-
input: anyRegexOutput.input,
44+
input: input,
4145
elements: [wholeMatchCapture] + anyRegexOutput._elements
4246
)
4347

4448
return output as! Output
4549
} else if Output.self == Substring.self {
4650
// FIXME: Plumb whole match (`.0`) through the matching engine.
47-
return anyRegexOutput.input[range] as! Output
48-
} else if anyRegexOutput.isEmpty, value != nil {
51+
return input[range] as! Output
52+
} else if anyRegexOutput.isEmpty, let value {
4953
// FIXME: This is a workaround for whole-match values not
5054
// being modeled as part of captures. We might want to
5155
// switch to a model where results are alongside captures
52-
return value! as! Output
56+
return value as! Output
5357
} else {
5458
guard value == nil else {
5559
fatalError("FIXME: what would this mean?")
5660
}
5761
let typeErasedMatch = anyRegexOutput.existentialOutput(
58-
from: anyRegexOutput.input[range]
62+
from: input[range]
5963
)
6064
return typeErasedMatch as! Output
6165
}
6266
}
6367

68+
var wholeMatchType: Any.Type {
69+
value.map { type(of: $0) } ?? Substring.self
70+
}
71+
6472
/// Accesses a capture by its name or number.
6573
public subscript<T>(dynamicMember keyPath: KeyPath<Output, T>) -> T {
66-
output[keyPath: keyPath]
74+
// Note: We should be able to get the element offset from the key path
75+
// itself even at compile time. We need a better way of doing this.
76+
guard let outputTupleOffset = MemoryLayout.tupleElementIndex(
77+
of: keyPath, elementTypes: [wholeMatchType] + anyRegexOutput.map(\.type)
78+
) else {
79+
return output[keyPath: keyPath]
80+
}
81+
if outputTupleOffset == 0 {
82+
return value.map { $0 as! T } ?? (input[range] as! T)
83+
} else {
84+
return anyRegexOutput[outputTupleOffset - 1].value as! T
85+
}
6786
}
6887

6988
/// Accesses a capture using the `.0` syntax, even when the match isn't a tuple.
@@ -83,7 +102,7 @@ extension Regex.Match {
83102
}
84103

85104
return element.existentialOutputComponent(
86-
from: anyRegexOutput.input[...]
105+
from: input[...]
87106
) as! Capture
88107
}
89108
}

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,19 @@ class RegexDSLTests: XCTestCase {
427427
CharacterClass.digit
428428
}
429429
}
430-
430+
431+
try _testDSLCaptures(
432+
("abcdef2", ("abcdef2", "f")),
433+
matchType: (Substring, Substring??).self, ==)
434+
{
435+
Optionally {
436+
ZeroOrMore {
437+
Capture(CharacterClass.word)
438+
}
439+
CharacterClass.digit
440+
}
441+
}
442+
431443
try _testDSLCaptures(
432444
("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"),
433445
("aaaabbbcccdddeeefff", nil),

0 commit comments

Comments
 (0)