Skip to content

Commit 2b210ac

Browse files
committed
wip
1 parent a669c65 commit 2b210ac

File tree

1 file changed

+46
-40
lines changed

1 file changed

+46
-40
lines changed

stdlib/public/core/UTF8SpanIterators.swift

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ extension UTF8Span {
1111
public struct ScalarIterator: ~Escapable {
1212
public var codeUnits: UTF8Span
1313

14-
/// The byte offset of the start of the next scalar. This is
14+
/// The byte offset of the start of the next scalar. This is
1515
/// always scalar-aligned.
1616
///
1717
/// **TODO**: private(set)?
18+
fileprivate(set)
1819
public var currentCodeUnitOffset: Int
1920

2021
// TODO: underscored init?
@@ -26,7 +27,7 @@ extension UTF8Span {
2627
/// Decode and return the scalar starting at `currentCodeUnitOffset`.
2728
/// After the function returns, `currentCodeUnitOffset` holds the
2829
/// position at the end of the returned scalar, which is also the start
29-
/// of the next scalar.
30+
/// of the next scalar.
3031
///
3132
/// Returns `nil` if at the end of the `UTF8Span`.
3233
public mutating func next() -> Unicode.Scalar? {
@@ -41,7 +42,7 @@ extension UTF8Span {
4142
/// Decode and return the scalar ending at `currentCodeUnitOffset`. After
4243
/// the function returns, `currentCodeUnitOffset` holds the position at
4344
/// the start of the returned scalar, which is also the end of the
44-
/// previous scalar.
45+
/// previous scalar.
4546
///
4647
/// Returns `nil` if at the start of the `UTF8Span`.
4748
public mutating func previous() -> Unicode.Scalar? {
@@ -53,42 +54,43 @@ extension UTF8Span {
5354
return result
5455
}
5556

56-
// **QUESTION**: How should skip(by: Int) APIs be defined? Should they
57-
// implicitly clamp to start/end? Should they return the number of code
58-
// units skipped? number of scalars skipped?
59-
//
60-
// Code units skipped can be calculated by the caller, but scalars
61-
// skipped (if < n) is harder to figure out. For now, I just return a
62-
// Bool signaling if there weren't enough scalars.
6357

6458
/// Advance `codeUnitOffset` to the end of the current scalar, without
6559
/// decoding it.
66-
public mutating func skipForward() -> Bool {
60+
///
61+
/// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
62+
/// if at the end of the UTF8Span.
63+
public mutating func skipForward() -> Int {
6764
fatalError()
6865
}
6966

7067
/// Advance `codeUnitOffset` to the end of `n` scalars, without decoding
7168
/// them.
72-
public mutating func skipForward(by n: Int) -> Bool {
69+
///
70+
/// Returns the number of `Unicode.Scalar`s skipped over, which can be
71+
/// fewer than `n` if at the end of the UTF8Span.
72+
public mutating func skipForward(by n: Int) -> Int {
7373
fatalError()
7474
}
7575

7676
/// Move `codeUnitOffset` to the start of the previous scalar, without
7777
/// decoding it.
78+
///
79+
/// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
80+
/// if at the start of the UTF8Span.
7881
public mutating func skipBack() -> Bool {
7982
fatalError()
8083
}
8184

8285
/// Move `codeUnitOffset` to the start of the previous `n` scalars,
8386
/// without decoding them.
87+
///
88+
/// Returns the number of `Unicode.Scalar`s skipped over, which can be
89+
/// fewer than `n` if at the start of the UTF8Span.
8490
public mutating func skipBack(by n: Int) -> Bool {
8591
fatalError()
8692
}
8793

88-
// **QUESTION**: For reset rounding, should we return the rounded position as
89-
// a discardable result? That would make checking if rounding occurred
90-
// easier, though that might be better served by a isScalarAligned API.
91-
9294
/// Reset to the nearest scalar-aligned code unit offset `<= i`.
9395
///
9496
/// **TODO**: Example
@@ -124,13 +126,6 @@ extension UTF8Span {
124126
self.currentCodeUnitOffset = i
125127
}
126128

127-
// **QUESTION**: Since UTF8Span can only be sliced on scalar-aligned
128-
// positions, and there are multiple levels of semantics to positions
129-
// (e.g. scalar-aligned, `Character`-aligned, `Grapheme-breaking
130-
// aligned`, I'm proposing having slicing be API on iterators rather
131-
// than `_extracting` on UTF8Span. But, is this the most useful
132-
// formulation?
133-
134129
/// Returns the UTF8Span containing all the content up to the iterator's
135130
/// current position.
136131
public func _prefix() -> UTF8Span {
@@ -146,8 +141,8 @@ extension UTF8Span {
146141
}
147142

148143
@available(SwiftStdlib 6.1, *)
144+
@_unavailableInEmbedded
149145
extension UTF8Span {
150-
151146
public func _makeCharacterIterator() -> CharacterIterator {
152147
.init(self)
153148
}
@@ -168,24 +163,17 @@ extension UTF8Span {
168163
/// occasionally be useful, but can yield counter intuitive results.
169164
///
170165
/// While we talk about code unit offsets always being scalar-aligned,
171-
/// should this type claim them to also be `Character` aligned as
172-
/// defined by the behavior of the iterator itself (i.e. the span is
173-
/// the entirety of the content)?
174-
///
175-
/// You can get split-the-character behavior by getting the UTF8Span
176-
/// formed by `prefix/suffix` on the scalar iterator if you really want
177-
/// to, so I'm going with this is always `Character`-aligned under the
178-
/// intrepretation of `UTF8Span` as holding the entirety of the
179-
/// content.
166+
/// we could go further to talk about `Character` aligned indices
167+
/// (where `Character`-alignment is relative to the start of the
168+
/// `UTF8Span`) and have API for those.
180169

181-
/// The byte offset of the start of the next `Character`. This is
170+
/// The byte offset of the start of the next `Character`. This is
182171
/// always scalar-aligned and `Character`-aligned.
183172
///
184173
/// **TODO**: How to talk about the
185174
/// assuming-the-UTF8Span-is-the-entire-content interpretation of
186175
/// `Character`-aligned?
187-
///
188-
/// **TODO**: private(set)?
176+
fileprivate(set)
189177
public var currentCodeUnitOffset: Int
190178

191179
// TODO: underscored init?
@@ -197,7 +185,7 @@ extension UTF8Span {
197185
/// Return the `Character` starting at `currentCodeUnitOffset`. After the
198186
/// function returns, `currentCodeUnitOffset` holds the position at the
199187
/// end of the `Character`, which is also the start of the next
200-
/// `Character`.
188+
/// `Character`.
201189
///
202190
/// Returns `nil` if at the end of the `UTF8Span`.
203191
public mutating func next() -> Character? {
@@ -215,7 +203,7 @@ extension UTF8Span {
215203
/// Return the `Character` ending at `currentCodeUnitOffset`. After the
216204
/// function returns, `currentCodeUnitOffset` holds the position at the
217205
/// start of the returned `Character`, which is also the end of the
218-
/// previous `Character`.
206+
/// previous `Character`.
219207
///
220208
/// Returns `nil` if at the start of the `UTF8Span`.
221209
public mutating func previous() -> Character? {
@@ -231,39 +219,56 @@ extension UTF8Span {
231219

232220
/// Advance `codeUnitOffset` to the end of the current `Character`,
233221
/// without constructing it.
222+
///
223+
/// Returns the number of `Character`s skipped over, which can be 0
224+
/// if at the end of the UTF8Span.
234225
public mutating func skipForward() {
235226
fatalError()
236227
}
237228

238229
/// Advance `codeUnitOffset` to the end of `n` `Characters`, without
239230
/// constructing them.
240-
public mutating func skipForward(by n: Int) {}
231+
///
232+
/// Returns the number of `Character`s skipped over, which can be
233+
/// fewer than `n` if at the end of the UTF8Span.
234+
public mutating func skipForward(by n: Int) {
235+
fatalError()
236+
}
241237

242238
/// Move `codeUnitOffset` to the start of the previous `Character`,
243239
/// without constructing it.
240+
///
241+
/// Returns the number of `Character`s skipped over, which can be 0
242+
/// if at the start of the UTF8Span.
244243
public mutating func skipBack() {
245244
fatalError()
246245
}
247246

248247
/// Move `codeUnitOffset` to the start of the previous `n` `Character`s,
249248
/// without constructing them.
249+
///
250+
/// Returns the number of `Character`s skipped over, which can be
251+
/// fewer than `n` if at the start of the UTF8Span.
250252
public mutating func skipBack(by n: Int) {
253+
fatalError()
251254
}
252255

253256
/// Reset to the nearest character-aligned position `<= i`.
254257
public mutating func reset(roundingBackwardsFrom i: Int) {
258+
fatalError()
255259
}
256260

257261
/// Reset to the nearest character-aligned position `>= i`.
258262
public mutating func reset(roundingForwardsFrom i: Int) {
263+
fatalError()
259264
}
260265

261266
/// Reset this iterator to code unit offset `i`, skipping _all_ safety
262267
/// checks.
263268
///
264269
/// Note: This is only for very specific, low-level use cases. If
265270
/// `codeUnitOffset` is not properly scalar-aligned, this function can
266-
/// result in undefined behavior when, e.g., `next()` is called.
271+
/// result in undefined behavior when, e.g., `next()` is called.
267272
///
268273
/// If `i` is scalar-aligned, but not `Character`-aligned, you may get
269274
/// different results from running `Character` iteration.
@@ -272,6 +277,7 @@ extension UTF8Span {
272277
/// known-valid previous position.
273278
///
274279
public mutating func reset(uncheckedAssumingAlignedTo i: Int) {
280+
fatalError()
275281
}
276282

277283
/// Returns the UTF8Span containing all the content up to the iterator's

0 commit comments

Comments
 (0)