-
Notifications
You must be signed in to change notification settings - Fork 49
Add regex-specific Matches and Ranges collections #460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
fc72a72
8157a38
a94c72c
5dff33a
30c540a
83d1a75
b200e9a
c7dd144
1c59173
ad2dae2
eb010f2
0078264
a110785
f793e3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,13 +183,156 @@ extension BidirectionalCollection { | |
|
||
// MARK: Regex algorithms | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
struct RegexMatchesCollection<Output> { | ||
let input: Substring | ||
let regex: Regex<Output> | ||
let startIndex: Index | ||
|
||
init(base: Substring, regex: Regex<Output>) { | ||
self.input = base | ||
self.regex = regex | ||
self.startIndex = base.firstMatch(of: regex).map(Index.match) ?? .end | ||
} | ||
} | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
extension RegexMatchesCollection: Sequence { | ||
/// Returns the index to start searching for the next match after `match`. | ||
fileprivate func searchIndex(after match: Regex<Output>.Match) -> String.Index? { | ||
if !match.range.isEmpty { | ||
return match.range.upperBound | ||
} | ||
|
||
// If the last match was an empty match, advance by one position and | ||
// run again, unless at the end of `input`. | ||
if match.range.lowerBound == input.endIndex { | ||
return nil | ||
} | ||
|
||
switch regex.initialOptions.semanticLevel { | ||
case .graphemeCluster: | ||
return input.index(after: match.range.upperBound) | ||
case .unicodeScalar: | ||
return input.unicodeScalars.index(after: match.range.upperBound) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've seen this code in a few places. I wonder if it should be an underscored helper method somewhere that takes the semantic level as a parameter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, I'll add it as part of #435. |
||
} | ||
|
||
struct Iterator: IteratorProtocol { | ||
let base: RegexMatchesCollection | ||
|
||
// Because `RegexMatchesCollection` eagerly computes the first match for | ||
// its `startIndex`, the iterator begins with this current match populated. | ||
// For subsequent calls to `next()`, this value is `nil`, and `nextStart` | ||
// is used to search for the next match. | ||
var currentMatch: Regex<Output>.Match? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we double store it? Is this just a Bool? |
||
var nextStart: String.Index? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the difference between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are three cases:
|
||
|
||
init(_ matches: RegexMatchesCollection) { | ||
self.base = matches | ||
self.currentMatch = matches.startIndex.match | ||
self.nextStart = currentMatch.flatMap(base.searchIndex(after:)) | ||
} | ||
|
||
mutating func next() -> Regex<Output>.Match? { | ||
// Initial case with pre-computed first match | ||
if let match = currentMatch { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we never set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, nice 👍🏻 |
||
currentMatch = nil | ||
return match | ||
} | ||
|
||
// `nextStart` is `nil` when iteration has completed | ||
guard let start = nextStart else { | ||
return nil | ||
} | ||
|
||
// Otherwise, find the next match (if any) and compute `nextStart` | ||
let match = try! base.regex.firstMatch(in: base.input[start...]) | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
nextStart = match.flatMap(base.searchIndex(after:)) | ||
return match | ||
} | ||
} | ||
|
||
func makeIterator() -> Iterator { | ||
Iterator(self) | ||
} | ||
} | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
extension RegexMatchesCollection: Collection { | ||
enum Index: Comparable { | ||
case match(Regex<Output>.Match) | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case end | ||
|
||
var match: Regex<Output>.Match? { | ||
switch self { | ||
case .match(let match): return match | ||
case .end: return nil | ||
} | ||
} | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
static func == (lhs: Self, rhs: Self) -> Bool { | ||
switch (lhs, rhs) { | ||
case (.match(let lhs), .match(let rhs)): | ||
return lhs.range == rhs.range | ||
case (.end, .end): | ||
return true | ||
case (.end, .match), (.match, .end): | ||
return false | ||
} | ||
} | ||
|
||
static func < (lhs: Self, rhs: Self) -> Bool { | ||
switch (lhs, rhs) { | ||
case (.match(let lhs), .match(let rhs)): | ||
// This implementation uses a tuple comparison so that an empty | ||
// range `i..<i` will be ordered before a non-empty range at that | ||
// same starting point `i..<j`. As of 2022-05-30, `Regex` does not | ||
// return matches of this kind, but that is one behavior under | ||
// discussion for regexes like /a*|b/ when matched against "b". | ||
return (lhs.range.lowerBound, lhs.range.upperBound) | ||
< (rhs.range.lowerBound, rhs.range.upperBound) | ||
case (.match, .end): | ||
return true | ||
case (.end, .match), (.end, .end): | ||
return false | ||
} | ||
} | ||
} | ||
|
||
var endIndex: Index { | ||
Index.end | ||
} | ||
|
||
func index(after i: Index) -> Index { | ||
guard let currentMatch = i.match else { | ||
fatalError("Can't advance past the 'endIndex' of a match collection.") | ||
} | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
guard | ||
let start = searchIndex(after: currentMatch), | ||
let nextMatch = try! regex.firstMatch(in: input[start...]) | ||
natecook1000 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else { | ||
return .end | ||
} | ||
return Index.match(nextMatch) | ||
} | ||
|
||
subscript(position: Index) -> Regex<Output>.Match { | ||
guard let match = position.match else { | ||
fatalError("Can't subscript the 'endIndex' of a match collection.") | ||
} | ||
return match | ||
} | ||
} | ||
|
||
extension BidirectionalCollection where SubSequence == Substring { | ||
@available(SwiftStdlib 5.7, *) | ||
@_disfavoredOverload | ||
func _matches<R: RegexComponent>( | ||
of regex: R | ||
) -> MatchesCollection<RegexConsumer<R, Self>> { | ||
_matches(of: RegexConsumer(regex)) | ||
) -> RegexMatchesCollection<R.RegexOutput> { | ||
RegexMatchesCollection(base: self[...], regex: regex.regex) | ||
} | ||
|
||
@available(SwiftStdlib 5.7, *) | ||
|
@@ -207,30 +350,6 @@ extension BidirectionalCollection where SubSequence == Substring { | |
public func matches<Output>( | ||
of r: some RegexComponent<Output> | ||
) -> [Regex<Output>.Match] { | ||
let slice = self[...] | ||
var start = self.startIndex | ||
let end = self.endIndex | ||
let regex = r.regex | ||
|
||
var result = [Regex<Output>.Match]() | ||
while start <= end { | ||
guard let match = try? regex._firstMatch( | ||
slice.base, in: start..<end | ||
) else { | ||
break | ||
} | ||
result.append(match) | ||
if match.range.isEmpty { | ||
if match.range.upperBound == end { | ||
break | ||
} | ||
// FIXME: semantic level | ||
start = slice.index(after: match.range.upperBound) | ||
} else { | ||
start = match.range.upperBound | ||
} | ||
} | ||
return result | ||
Array(_matches(of: r)) | ||
} | ||
|
||
} |
Uh oh!
There was an error while loading. Please reload this page.