Skip to content

Commit 5114ea4

Browse files
committed
Improve characterAndEnd algorithm a bit
1 parent 3cd121a commit 5114ea4

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

Sources/_StringProcessing/Engine/MEBuiltins.swift

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ extension String {
125125
///
126126
/// This function handles loading a character from a string while respecting
127127
/// an end boundary, even if that end boundary is sub-character or sub-scalar.
128-
128+
///
129129
/// - If `pos` is at or past `end`, this function returns `nil`.
130130
/// - If `end` is between `pos` and the next grapheme cluster boundary (i.e.,
131131
/// `end` is before `self.index(after: pos)`, then the returned character
@@ -145,10 +145,20 @@ extension String {
145145
func characterAndEnd(at pos: String.Index, limitedBy end: String.Index) -> (Character, String.Index)? {
146146
// FIXME: Sink into the stdlib to avoid multiple boundary calculations
147147
guard pos < end else { return nil }
148-
let next = index(pos, offsetBy: 1, limitedBy: end) ?? end
149-
// Substring will round down non-scalar aligned indices
150-
let substr = self[pos..<next]
151-
return substr.first.map { ($0, substr.endIndex) }
148+
let next = index(after: pos)
149+
if next <= end {
150+
return (self[pos], next)
151+
}
152+
153+
// `end` must be a sub-character position that is between `pos` and the
154+
// next grapheme boundary. This is okay if `end` is on a Unicode scalar
155+
// boundary, but if it's in the middle of a scalar's code units, there
156+
// may not be a character to return at all after rounding down. Use
157+
// `Substring`'s rounding to determine what we can return.
158+
let substr = self[pos..<end]
159+
return substr.isEmpty
160+
? nil
161+
: (substr.first!, substr.endIndex)
152162
}
153163

154164
func matchAnyNonNewline(

0 commit comments

Comments
 (0)