Skip to content

Commit 93794cd

Browse files
authored
Fix bug in word boundary caching (#769)
1 parent ce8f124 commit 93794cd

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

Sources/_StringProcessing/Unicode/WordBreaking.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,18 +81,18 @@ extension String {
8181
}
8282

8383
if #available(SwiftStdlib 5.7, *) {
84-
var indices: Set<String.Index> = []
84+
if cache == nil {
85+
cache = []
86+
}
8587
var j = maxIndex ?? range.lowerBound
8688

8789
while j < range.upperBound, j <= i {
88-
indices.insert(j)
90+
cache!.insert(j)
8991
j = _wordIndex(after: j)
9092
}
9193

92-
cache = indices
9394
maxIndex = j
94-
95-
return indices.contains(i)
95+
return cache!.contains(i)
9696
} else {
9797
return false
9898
}

Tests/RegexTests/MatchTests.swift

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2381,7 +2381,18 @@ extension RegexTests {
23812381
XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
23822382
XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
23832383
}
2384-
2384+
2385+
// https://github.com/swiftlang/swift-experimental-string-processing/issues/768
2386+
func testWordBoundaryCaching() throws {
2387+
// This will first find word boundaries up til the middle before failing,
2388+
// then it will find word boundaries til late in the string, then fail,
2389+
// and finally should succeed on a word boundary cached from the first
2390+
// attempt.
2391+
let input = "first second third fourth"
2392+
let regex = try Regex(#".*second\bX|.*third\bX|.*first\b"#)
2393+
XCTAssertTrue(input.contains(regex))
2394+
}
2395+
23852396
// MARK: Character Semantics
23862397

23872398
var eComposed: String { "é" }

0 commit comments

Comments
 (0)