Skip to content

Commit 2f2b07a

Browse files
committed
Obtain match output elements without materializing the output.
Resolves #267.
1 parent 32afc43 commit 2f2b07a

File tree

3 files changed

+74
-10
lines changed

3 files changed

+74
-10
lines changed

Sources/_RegexParser/Utility/TypeConstruction.swift

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,46 @@ public enum TypeConstruction {
139139
return _openExistential(childType, do: helper)
140140
}
141141
}
142+
143+
extension MemoryLayout {
144+
/// Returns the element index that corresponnds to the given tuple element key
145+
/// path.
146+
/// - Parameters:
147+
/// - keyPath: The key path from a tuple to one of its elements.
148+
/// - elementTypes: The element type of the tuple type.
149+
// TODO: It possible to get element types from the type metadata, but it's
150+
// more efficient to pass them in since we already know them in the matching
151+
// engine.
152+
public static func tupleElementIndex<ElementTypes: Collection>(
153+
of keyPath: PartialKeyPath<T>,
154+
elementTypes: ElementTypes
155+
) -> Int? where ElementTypes.Element == Any.Type {
156+
guard let byteOffset = offset(of: keyPath) else {
157+
return nil
158+
}
159+
if byteOffset == 0 { return 0 }
160+
var currentOffset = 0
161+
for (index, type) in elementTypes.enumerated() {
162+
func sizeAndAlignMask<T>(_: T.Type) -> (Int, Int) {
163+
(MemoryLayout<T>.size, MemoryLayout<T>.alignment - 1)
164+
}
165+
// The ABI of an offset-based key path only stores the byte offset, so
166+
// this doesn't work if there's a 0-sized element, e.g. `Void`,
167+
// `(Void, Void)`.
168+
if size == 0 {
169+
return nil
170+
}
171+
let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask)
172+
// Align up the offset for this type.
173+
currentOffset = (currentOffset + alignMask) & ~alignMask
174+
// If it matches the offset we are looking for, `index` is the tuple
175+
// element index.
176+
if currentOffset == byteOffset {
177+
return index
178+
}
179+
// Advance to the past-the-end offset for this element.
180+
currentOffset += size
181+
}
182+
return nil
183+
}
184+
}

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,11 @@ extension AnyRegexOutput: RandomAccessCollection {
6262

6363
/// The captured value, `nil` for no-capture
6464
public var value: Any? {
65-
// FIXME: Should this return the substring for default-typed
66-
// values?
67-
representation.value
65+
representation.value ?? substring
66+
}
67+
68+
internal var type: Any.Type {
69+
value.map { Swift.type(of: $0) } ?? Substring.self
6870
}
6971

7072
/// The name of this capture, if it has one, otherwise `nil`.

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ extension Regex {
2828

2929
@available(SwiftStdlib 5.7, *)
3030
extension Regex.Match {
31+
var input: String {
32+
anyRegexOutput.input
33+
}
34+
3135
/// The output produced from the match operation.
3236
public var output: Output {
3337
if Output.self == AnyRegexOutput.self {
@@ -37,33 +41,48 @@ extension Regex.Match {
3741
)
3842

3943
let output = AnyRegexOutput(
40-
input: anyRegexOutput.input,
44+
input: input,
4145
elements: [wholeMatchCapture] + anyRegexOutput._elements
4246
)
4347

4448
return output as! Output
4549
} else if Output.self == Substring.self {
4650
// FIXME: Plumb whole match (`.0`) through the matching engine.
47-
return anyRegexOutput.input[range] as! Output
48-
} else if anyRegexOutput.isEmpty, value != nil {
51+
return input[range] as! Output
52+
} else if anyRegexOutput.isEmpty, let value {
4953
// FIXME: This is a workaround for whole-match values not
5054
// being modeled as part of captures. We might want to
5155
// switch to a model where results are alongside captures
52-
return value! as! Output
56+
return value as! Output
5357
} else {
5458
guard value == nil else {
5559
fatalError("FIXME: what would this mean?")
5660
}
5761
let typeErasedMatch = anyRegexOutput.existentialOutput(
58-
from: anyRegexOutput.input[range]
62+
from: input[range]
5963
)
6064
return typeErasedMatch as! Output
6165
}
6266
}
6367

68+
var wholeMatchType: Any.Type {
69+
value.map { type(of: $0) } ?? Substring.self
70+
}
71+
6472
/// Accesses a capture by its name or number.
6573
public subscript<T>(dynamicMember keyPath: KeyPath<Output, T>) -> T {
66-
output[keyPath: keyPath]
74+
// Note: We should be able to get the element offset from the key path
75+
// itself even at compile time. We need a better way of doing this.
76+
guard let outputTupleOffset = MemoryLayout.tupleElementIndex(
77+
of: keyPath, elementTypes: [wholeMatchType] + anyRegexOutput.map(\.type)
78+
) else {
79+
return output[keyPath: keyPath]
80+
}
81+
if outputTupleOffset == 0 {
82+
return value.map { $0 as! T } ?? (input[range] as! T)
83+
} else {
84+
return anyRegexOutput[outputTupleOffset - 1].value as! T
85+
}
6786
}
6887

6988
/// Accesses a capture using the `.0` syntax, even when the match isn't a tuple.
@@ -83,7 +102,7 @@ extension Regex.Match {
83102
}
84103

85104
return element.existentialOutputComponent(
86-
from: anyRegexOutput.input[...]
105+
from: input[...]
87106
) as! Capture
88107
}
89108
}

0 commit comments

Comments
 (0)