Skip to content

Commit 2da13f9

Browse files
committed
Bug fix and hot path for quantified . (swiftlang#658)
Bug fix in newline hot path, and apply hot path to quantified dot
1 parent f93a747 commit 2da13f9

File tree

3 files changed

+29
-16
lines changed

3 files changed

+29
-16
lines changed

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,10 @@ extension Processor {
6363
switch payload.semanticLevel {
6464
case .graphemeCluster:
6565
return input.index(after: currentPosition) == subjectBounds.upperBound
66-
&& input[currentPosition].isNewline
66+
&& input[currentPosition].isNewline
6767
case .unicodeScalar:
6868
return input.unicodeScalars.index(after: currentPosition) == subjectBounds.upperBound
69-
&& input.unicodeScalars[currentPosition].isNewline
69+
&& input.unicodeScalars[currentPosition].isNewline
7070
}
7171

7272
case .endOfSubject: return currentPosition == subjectBounds.upperBound
@@ -121,6 +121,7 @@ extension Processor {
121121

122122
// MARK: Matching `.`
123123
extension String {
124+
// TODO: Should the below have a `limitedBy` parameter?
124125

125126
func _matchAnyNonNewline(
126127
at currentPosition: String.Index,
@@ -155,11 +156,11 @@ extension String {
155156
return .unknown
156157
}
157158
switch asciiValue {
158-
case ._lineFeed, ._carriageReturn:
159-
return .definite(nil)
160-
default:
161-
assert(!isCRLF)
162-
return .definite(next)
159+
case (._lineFeed)...(._carriageReturn):
160+
return .definite(nil)
161+
default:
162+
assert(!isCRLF)
163+
return .definite(next)
163164
}
164165
}
165166

@@ -183,6 +184,7 @@ extension String {
183184

184185
// MARK: - Built-in character class matching
185186
extension String {
187+
// TODO: Should the below have a `limitedBy` parameter?
186188

187189
// Mentioned in ProgrammersManual.md, update docs if redesigned
188190
func _matchBuiltinCC(

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,37 @@
11
extension Processor {
22
func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? {
3-
var next: Input.Index?
3+
// FIXME: is the below updated for scalar semantics?
44
switch payload.type {
55
case .bitset:
6-
next = input.matchBitset(
6+
return input.matchBitset(
77
registers[payload.bitset], at: currentPosition, limitedBy: end)
88
case .asciiChar:
9-
next = input.matchScalar(
9+
return input.matchScalar(
1010
UnicodeScalar.init(_value: UInt32(payload.asciiChar)),
1111
at: currentPosition,
1212
limitedBy: end,
1313
boundaryCheck: true)
1414
case .builtin:
15+
// FIXME: bounds check? endIndex or end?
16+
1517
// We only emit .quantify if it consumes a single character
16-
next = input._matchBuiltinCC(
18+
return input._matchBuiltinCC(
1719
payload.builtin,
1820
at: currentPosition,
1921
isInverted: payload.builtinIsInverted,
2022
isStrictASCII: payload.builtinIsStrict,
2123
isScalarSemantics: false)
2224
case .any:
23-
// TODO: call out to existing code with quick check
24-
let matched = currentPosition != input.endIndex
25-
&& (!input[currentPosition].isNewline || payload.anyMatchesNewline)
26-
next = matched ? input.index(after: currentPosition) : nil
25+
// FIXME: endIndex or end?
26+
guard currentPosition < input.endIndex else { return nil }
27+
28+
if payload.anyMatchesNewline {
29+
return input.index(after: currentPosition)
30+
}
31+
32+
return input._matchAnyNonNewline(
33+
at: currentPosition, isScalarSemantics: false)
2734
}
28-
return next
2935
}
3036

3137
/// Generic quantify instruction interpreter

Tests/RegexTests/MatchTests.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,6 +1891,11 @@ extension RegexTests {
18911891
func testSingleLineMode() {
18921892
firstMatchTest(#".+"#, input: "a\nb", match: "a")
18931893
firstMatchTest(#"(?s:.+)"#, input: "a\nb", match: "a\nb")
1894+
1895+
// We recognize LF, line tab, FF, and CR as newlines by default
1896+
firstMatchTest(#"."#, input: "\u{A}\u{B}\u{C}\u{D}\nb", match: "b")
1897+
firstMatchTest(#".+"#, input: "\u{A}\u{B}\u{C}\u{D}\nbb", match: "bb")
1898+
18941899
}
18951900

18961901
func testMatchNewlines() {

0 commit comments

Comments
 (0)