wip

milseman · milseman · commit 2b210ac9ba0b · 2025-02-10T13:10:53.000-07:00
diff --git a/stdlib/public/core/UTF8SpanIterators.swift b/stdlib/public/core/UTF8SpanIterators.swift
@@ -11,10 +11,11 @@ extension UTF8Span {
   public struct ScalarIterator: ~Escapable {
     public var codeUnits: UTF8Span
 
-    /// The byte offset of the start of the next scalar. This is 
+    /// The byte offset of the start of the next scalar. This is
     /// always scalar-aligned.
     ///
     /// **TODO**: private(set)?
+    fileprivate(set)
     public var currentCodeUnitOffset: Int
 
     // TODO: underscored init?
@@ -26,7 +27,7 @@ extension UTF8Span {
     /// Decode and return the scalar starting at `currentCodeUnitOffset`.
     /// After the function returns, `currentCodeUnitOffset` holds the
     /// position at the end of the returned scalar, which is also the start
-    /// of the next scalar. 
+    /// of the next scalar.
     ///
     /// Returns `nil` if at the end of the `UTF8Span`.
     public mutating func next() -> Unicode.Scalar? {
@@ -41,7 +42,7 @@ extension UTF8Span {
     /// Decode and return the scalar ending at `currentCodeUnitOffset`. After
     /// the function returns, `currentCodeUnitOffset` holds the position at
     /// the start of the returned scalar, which is also the end of the
-    /// previous scalar. 
+    /// previous scalar.
     ///
     /// Returns `nil` if at the start of the `UTF8Span`.
     public mutating func previous() -> Unicode.Scalar? {
@@ -53,42 +54,43 @@ extension UTF8Span {
       return result
     }
 
-    // **QUESTION**: How should skip(by: Int) APIs be defined? Should they
-    //   implicitly clamp to start/end? Should they return the number of code
-    //   units skipped? number of scalars skipped? 
-    //  
-    //   Code units skipped can be calculated by the caller, but scalars
-    //   skipped (if < n) is harder to figure out. For now, I just return a
-    //   Bool signaling if there weren't enough scalars.
 
     /// Advance `codeUnitOffset` to the end of the current scalar, without
     /// decoding it.
-    public mutating func skipForward() -> Bool {
+    ///
+    /// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
+    /// if at the end of the UTF8Span.
+    public mutating func skipForward() -> Int {
       fatalError()
     }
 
     /// Advance `codeUnitOffset` to the end of `n` scalars, without decoding
     /// them.
-    public mutating func skipForward(by n: Int) -> Bool {
+    ///
+    /// Returns the number of `Unicode.Scalar`s skipped over, which can be
+    /// fewer than `n` if at the end of the UTF8Span.
+    public mutating func skipForward(by n: Int) -> Int {
       fatalError()
     }
 
     /// Move `codeUnitOffset` to the start of the previous scalar, without
     /// decoding it.
+    ///
+    /// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
+    /// if at the start of the UTF8Span.
     public mutating func skipBack() -> Bool {
       fatalError()
     }
 
     /// Move `codeUnitOffset` to the start of the previous `n` scalars,
     /// without decoding them.
+    ///
+    /// Returns the number of `Unicode.Scalar`s skipped over, which can be
+    /// fewer than `n` if at the start of the UTF8Span.
     public mutating func skipBack(by n: Int) -> Bool {
       fatalError()
     }
 
-    // **QUESTION**: For reset rounding, should we return the rounded position as
-    //   a discardable result? That would make checking if rounding occurred 
-    //   easier, though that might be better served by a isScalarAligned API.
-
     /// Reset to the nearest scalar-aligned code unit offset `<= i`.
     ///
     /// **TODO**: Example
@@ -124,13 +126,6 @@ extension UTF8Span {
       self.currentCodeUnitOffset = i
     }
 
-    // **QUESTION**: Since UTF8Span can only be sliced on scalar-aligned
-    //   positions, and there are multiple levels of semantics to positions
-    //   (e.g. scalar-aligned, `Character`-aligned, `Grapheme-breaking
-    //   aligned`, I'm proposing having slicing be API on iterators rather
-    //   than `_extracting` on UTF8Span. But, is this the most useful
-    //   formulation?
-
     /// Returns the UTF8Span containing all the content up to the iterator's
     /// current position.
     public func _prefix() -> UTF8Span {
@@ -146,8 +141,8 @@ extension UTF8Span {
 }
 
 @available(SwiftStdlib 6.1, *)
+@_unavailableInEmbedded
 extension UTF8Span {
-
   public func _makeCharacterIterator() -> CharacterIterator {
     .init(self)
   }
@@ -168,24 +163,17 @@ extension UTF8Span {
     ///   occasionally be useful, but can yield counter intuitive results.
     ///
     ///   While we talk about code unit offsets always being scalar-aligned,
-    ///   should this type claim them to also be `Character` aligned as
-    ///   defined by the behavior of the iterator itself (i.e. the span is
-    ///   the entirety of the content)? 
-    ///
-    ///   You can get split-the-character behavior by getting the UTF8Span
-    ///   formed by `prefix/suffix` on the scalar iterator if you really want
-    ///   to, so I'm going with this is always `Character`-aligned under the
-    ///   intrepretation of `UTF8Span` as holding the entirety of the
-    ///   content.
+    ///   we could go further to talk about `Character` aligned indices
+    ///   (where `Character`-alignment is relative to the start of the
+    ///   `UTF8Span`) and have API for those.
 
-    /// The byte offset of the start of the next `Character`. This is 
+    /// The byte offset of the start of the next `Character`. This is
     /// always scalar-aligned and `Character`-aligned.
     ///
     /// **TODO**: How to talk about the
     ///   assuming-the-UTF8Span-is-the-entire-content interpretation of
     ///   `Character`-aligned?
-    ///
-    /// **TODO**: private(set)?
+    fileprivate(set)
     public var currentCodeUnitOffset: Int
 
     // TODO: underscored init?
@@ -197,7 +185,7 @@ extension UTF8Span {
     /// Return the `Character` starting at `currentCodeUnitOffset`. After the
     /// function returns, `currentCodeUnitOffset` holds the position at the
     /// end of the `Character`, which is also the start of the next
-    /// `Character`. 
+    /// `Character`.
     ///
     /// Returns `nil` if at the end of the `UTF8Span`.
     public mutating func next() -> Character? {
@@ -215,7 +203,7 @@ extension UTF8Span {
     /// Return the `Character` ending at `currentCodeUnitOffset`. After the
     /// function returns, `currentCodeUnitOffset` holds the position at the
     /// start of the returned `Character`, which is also the end of the
-    /// previous `Character`. 
+    /// previous `Character`.
     ///
     /// Returns `nil` if at the start of the `UTF8Span`.
     public mutating func previous() -> Character? {
@@ -231,39 +219,56 @@ extension UTF8Span {
 
     /// Advance `codeUnitOffset` to the end of the current `Character`,
     /// without constructing it.
+    ///
+    /// Returns the number of `Character`s skipped over, which can be 0
+    /// if at the end of the UTF8Span.
     public mutating func skipForward() {
       fatalError()
     }
 
     /// Advance `codeUnitOffset` to the end of `n` `Characters`, without
     /// constructing them.
-    public mutating func skipForward(by n: Int) {}
+    ///
+    /// Returns the number of `Character`s skipped over, which can be
+    /// fewer than `n` if at the end of the UTF8Span.
+    public mutating func skipForward(by n: Int) {
+      fatalError()
+    }
 
     /// Move `codeUnitOffset` to the start of the previous `Character`,
     /// without constructing it.
+    ///
+    /// Returns the number of `Character`s skipped over, which can be 0
+    /// if at the start of the UTF8Span.
     public mutating func skipBack() {
       fatalError()
     }
 
     /// Move `codeUnitOffset` to the start of the previous `n` `Character`s,
     /// without constructing them.
+    ///
+    /// Returns the number of `Character`s skipped over, which can be
+    /// fewer than `n` if at the start of the UTF8Span.
     public mutating func skipBack(by n: Int) {
+      fatalError()
     }
 
     /// Reset to the nearest character-aligned position `<= i`.
     public mutating func reset(roundingBackwardsFrom i: Int) {
+      fatalError()
     }
 
     /// Reset to the nearest character-aligned position `>= i`.
     public mutating func reset(roundingForwardsFrom i: Int) {
+      fatalError()
     }
 
     /// Reset this iterator to code unit offset `i`, skipping _all_ safety
     /// checks.
     ///
     /// Note: This is only for very specific, low-level use cases. If
     /// `codeUnitOffset` is not properly scalar-aligned, this function can
-    /// result in undefined behavior when, e.g., `next()` is called. 
+    /// result in undefined behavior when, e.g., `next()` is called.
     ///
     /// If `i` is scalar-aligned, but not `Character`-aligned, you may get
     /// different results from running `Character` iteration.
@@ -272,6 +277,7 @@ extension UTF8Span {
     /// known-valid previous position.
     ///
     public mutating func reset(uncheckedAssumingAlignedTo i: Int) {
+      fatalError()
     }
 
     /// Returns the UTF8Span containing all the content up to the iterator's