Skip to content

Commit bb6d0ce

Browse files
committed
wip: working iterator tests
1 parent 7deb56c commit bb6d0ce

6 files changed

+60
-26
lines changed

stdlib/public/core/UTF8Span.swift

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
@frozen
66
@available(SwiftStdlib 6.1, *)
77
public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
8-
public var unsafeBaseAddress: UnsafeRawPointer
8+
public var unsafeBaseAddress: UnsafeRawPointer?
99

1010
/*
1111
A bit-packed count and flags (such as isASCII)
@@ -50,24 +50,38 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
5050

5151
_invariantCheck()
5252
}
53+
54+
// FIXME: we need to make sure ALL API are nil safe, that is they
55+
// at least check the count first
56+
@_alwaysEmitIntoClient
57+
internal func _start() -> UnsafeRawPointer {
58+
unsafeBaseAddress._unsafelyUnwrappedUnchecked
59+
}
5360
}
5461

5562
// TODO: init strategy: underscored public that use lifetime annotations
5663

64+
// TODO: try to convert code to be ran on Span instead of URP
65+
5766
@available(SwiftStdlib 6.1, *)
5867
extension UTF8Span {
5968
// TODO: this doesn't need to be underscored, I don't think
6069
@lifetime(codeUnits)
6170
public init(
6271
_validating codeUnits: consuming Span<UInt8>
6372
) throws(UTF8.EncodingError) {
64-
// TODO: handle empty/null span
65-
66-
self.unsafeBaseAddress = .init(codeUnits._start())
73+
guard let ptr = codeUnits._pointer else {
74+
self.unsafeBaseAddress = nil
75+
self._countAndFlags = 0
76+
return
77+
}
6778

79+
// FIXME: handle empty/null span
80+
let basePtr = codeUnits._start()
6881
let count = codeUnits._count
69-
let isASCII = try unsafeBaseAddress._validateUTF8(limitedBy: count)
82+
let isASCII = try basePtr._validateUTF8(limitedBy: count)
7083

84+
self.unsafeBaseAddress = .init(basePtr)
7185
self._countAndFlags = UInt64(truncatingIfNeeded: count)
7286
if isASCII {
7387
_setIsASCII()
@@ -153,7 +167,7 @@ extension UTF8Span {
153167
@available(SwiftStdlib 6.1, *)
154168
extension UTF8Span {
155169
// HACK: working around lack of internals
156-
internal var _str: String { unsafeBaseAddress._str(0..<count) }
170+
internal var _str: String { _start()._str(0..<count) }
157171

158172
/// Whether `self` is equivalent to `other` under Unicode Canonical
159173
/// Equivalence.
@@ -197,7 +211,7 @@ extension UTF8Span {
197211
>(
198212
_ body: (_ buffer: /*borrowing*/ UnsafeBufferPointer<UInt8>) throws(E) -> Result
199213
) throws(E) -> Result {
200-
try body(unsafeBaseAddress._ubp(0..<count))
214+
try body(_start()._ubp(0..<count))
201215
}
202216

203217
// TODO: withSpan or similar?
@@ -212,7 +226,7 @@ extension UTF8Span {
212226
#if DEBUG
213227
if isNullTerminatedCString {
214228
_internalInvariant(
215-
unsafeBaseAddress.load(fromByteOffset: count, as: UInt8.self) == 0)
229+
_start().load(fromByteOffset: count, as: UInt8.self) == 0)
216230
// TODO: byte scan for no interior nulls...
217231
}
218232
#endif

stdlib/public/core/UTF8SpanBits.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ extension UTF8Span {
7373

7474
// TODO: use faster internal algorithm
7575
let normalized = _str._nfcCodeUnits
76-
guard unsafeBaseAddress._urbp(
76+
guard _start()._urbp(
7777
0..<count
7878
).elementsEqual(normalized) else {
7979
return false

stdlib/public/core/UTF8SpanFundamentals.swift

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ extension UTF8Span {
1717
internal func isScalarAligned(unchecked i: Int) -> Bool {
1818
if i == count || i == 0 { return true }
1919
_internalInvariant(boundsCheck(i))
20-
return unsafeBaseAddress._isScalarAligned(i)
20+
return _start()._isScalarAligned(i)
2121
}
2222

2323
/// Whether `range`'s bounds are aligned to `Unicode.Scalar` boundaries.
@@ -76,7 +76,7 @@ extension UTF8Span {
7676
) -> Int {
7777
_internalInvariant(boundsCheck(i))
7878
_internalInvariant(isScalarAligned(i))
79-
return unsafeBaseAddress._nextScalarStart(i)
79+
return _start()._nextScalarStart(i)
8080
}
8181

8282
/// Returns the start of the `Unicode.Scalar` ending at `i`, i.e. the scalar
@@ -123,7 +123,7 @@ extension UTF8Span {
123123
) -> Int {
124124
_internalInvariant(boundsCheck(i&-1))
125125
_internalInvariant(isScalarAligned(i))
126-
return unsafeBaseAddress._previousScalarStart(i)
126+
return _start()._previousScalarStart(i)
127127
}
128128

129129
/// Decode the `Unicode.Scalar` starting at `i`. Return it and the start of
@@ -171,7 +171,7 @@ extension UTF8Span {
171171
) -> (Unicode.Scalar, nextScalarStart: Int) {
172172
_internalInvariant(boundsCheck(i))
173173
_internalInvariant(isScalarAligned(i))
174-
return unsafeBaseAddress._decodeScalar(startingAt: i)
174+
return _start()._decodeScalar(startingAt: i)
175175
}
176176

177177
/// Decode the `Unicode.Scalar` ending at `i`, i.e. the previous scalar.
@@ -219,7 +219,7 @@ extension UTF8Span {
219219
) -> (Unicode.Scalar, previousScalarStart: Int) {
220220
_internalInvariant(boundsCheck(i &- 1))
221221
_internalInvariant(isScalarAligned(i))
222-
return unsafeBaseAddress._decodeScalar(endingAt: i)
222+
return _start()._decodeScalar(endingAt: i)
223223
}
224224
}
225225

@@ -240,7 +240,7 @@ extension UTF8Span {
240240
@_alwaysEmitIntoClient
241241
internal func scalarAlignBackwards(unchecked i: Int) -> Int {
242242
_internalInvariant(boundsCheck(i))
243-
return unsafeBaseAddress._scalarAlign(i)
243+
return _start()._scalarAlign(i)
244244
}
245245

246246
/// Find the nearest scalar-aligned position `>= i`.
@@ -290,7 +290,7 @@ extension UTF8Span {
290290
internal func isCharacterAligned(unchecked i: Int) -> Bool {
291291
if i == count || i == 0 { return true }
292292
_internalInvariant(boundsCheck(i))
293-
return unsafeBaseAddress._isCharacterAligned(i, limitedBy: count)
293+
return _start()._isCharacterAligned(i, limitedBy: count)
294294
}
295295

296296
/// Returns the start of the next `Character` (i.e. grapheme cluster) after
@@ -336,7 +336,7 @@ extension UTF8Span {
336336
) -> Int {
337337
_internalInvariant(boundsCheck(i))
338338
_internalInvariant(isCharacterAligned(i))
339-
return unsafeBaseAddress._nextCharacterStart(i, limitedBy: count)
339+
return _start()._nextCharacterStart(i, limitedBy: count)
340340
}
341341

342342
/// Returns the start of the `Character` (i.e. grapheme cluster) ending at
@@ -382,7 +382,7 @@ extension UTF8Span {
382382
) -> Int {
383383
_internalInvariant(boundsCheck(i&-1))
384384
_internalInvariant(isCharacterAligned(i))
385-
return unsafeBaseAddress._previousCharacterStart(i, limitedBy: count)
385+
return _start()._previousCharacterStart(i, limitedBy: count)
386386
}
387387

388388
/// Decode the `Character` starting at `i` Return it and the start of the
@@ -429,7 +429,7 @@ extension UTF8Span {
429429
) -> (Character, nextCharacterStart: Int) {
430430
_internalInvariant(boundsCheck(i))
431431
_internalInvariant(isCharacterAligned(i))
432-
return unsafeBaseAddress._decodeCharacter(
432+
return _start()._decodeCharacter(
433433
startingAt: i, limitedBy: count)
434434
}
435435

@@ -475,7 +475,7 @@ extension UTF8Span {
475475
) -> (Character, Int) {
476476
_internalInvariant(boundsCheck(i &- 1))
477477
_internalInvariant(isCharacterAligned(i))
478-
return unsafeBaseAddress._decodeCharacter(
478+
return _start()._decodeCharacter(
479479
endingAt: i, limitedBy: count)
480480
}
481481

stdlib/public/core/UTF8SpanIterators.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ extension UTF8Span {
3333
guard currentCodeUnitOffset < codeUnits.count else { return nil }
3434

3535
_internalInvariant(codeUnits.isScalarAligned(currentCodeUnitOffset))
36-
let (result, newPos) = codeUnits.unsafeBaseAddress._decodeScalar(startingAt: currentCodeUnitOffset)
36+
let (result, newPos) = codeUnits._start()._decodeScalar(startingAt: currentCodeUnitOffset)
3737
self.currentCodeUnitOffset = newPos
3838
return result
3939
}
@@ -48,7 +48,7 @@ extension UTF8Span {
4848
guard currentCodeUnitOffset > 0 else { return nil }
4949

5050
_internalInvariant(codeUnits.isScalarAligned(currentCodeUnitOffset))
51-
let (result, newPos) = codeUnits.unsafeBaseAddress._decodeScalar(endingAt: currentCodeUnitOffset)
51+
let (result, newPos) = codeUnits._start()._decodeScalar(endingAt: currentCodeUnitOffset)
5252
self.currentCodeUnitOffset = newPos
5353
return result
5454
}
@@ -204,7 +204,7 @@ extension UTF8Span {
204204
guard currentCodeUnitOffset < codeUnits.count else { return nil }
205205

206206
_internalInvariant(codeUnits.isScalarAligned(currentCodeUnitOffset))
207-
let (result, newPos) = codeUnits.unsafeBaseAddress._decodeCharacter(
207+
let (result, newPos) = codeUnits._start()._decodeCharacter(
208208
startingAt: currentCodeUnitOffset,
209209
limitedBy: codeUnits.count
210210
)
@@ -222,7 +222,7 @@ extension UTF8Span {
222222
guard currentCodeUnitOffset > 0 else { return nil }
223223

224224
_internalInvariant(codeUnits.isScalarAligned(currentCodeUnitOffset))
225-
let (result, newPos) = codeUnits.unsafeBaseAddress._decodeCharacter(
225+
let (result, newPos) = codeUnits._start()._decodeCharacter(
226226
endingAt: currentCodeUnitOffset,
227227
limitedBy: codeUnits.count)
228228
self.currentCodeUnitOffset = newPos

test/stdlib/UTF8EncodingErrorTests.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,14 @@ private struct ValidationTestCase {
211211

212212
if #available(SwiftStdlib 6.1, *) {
213213
suite.test("UTF8Span/encoding errors") {
214-
func test(_ t: ValidationTestCase) {
215-
t.run()
214+
func test(
215+
_ bytes: Array<UInt8>,
216+
_ errors: ValidationError...,
217+
_ file: String = #file, line: UInt = #line
218+
) {
219+
ValidationTestCase(
220+
bytes, errors, file, line
221+
).run()
216222
}
217223

218224
// Valid string

test/stdlib/UTF8SpanIteratorTests.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ struct ContentEquivalenceTestCase {
6868
}
6969
expectNil(stringRevIter.next(), stackTrace: loc)
7070

71+
// TODO: test various skip(by) API
72+
73+
// TODO: test reset variants
74+
75+
// TODO: test prefix/suffix
76+
7177
}
7278
}
7379

@@ -87,13 +93,21 @@ struct ContentEquivalenceTestCase {
8793
expectEqual(char, stringRevIter.next(), stackTrace: loc)
8894
}
8995
expectNil(stringRevIter.next(), stackTrace: loc)
96+
97+
98+
// TODO: test various skip(by) API
99+
100+
101+
// TODO: test reset variants
90102
}
91103
}
92104

93105
func run() {
94106
testBytes()
95107
testScalars()
96108
testCharacters()
109+
110+
// TODO: test grapheme break iterator
97111
}
98112

99113
}

0 commit comments

Comments
 (0)