Skip to content

Commit e9d12a4

Browse files
committed
SQUASH ME: unsafe annotations, final API
1 parent 5fb7692 commit e9d12a4

10 files changed

+261
-174
lines changed

stdlib/public/core/StringNormalization.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ extension _StringGutsSlice {
126126

127127
internal func _fastNFCCheck(_ isNFCQC: inout Bool, _ prevCCC: inout UInt8) {
128128
unsafe withFastUTF8 { utf8 in
129-
isNFCQC = _nfcQuickCheck(utf8, prevCCC: &prevCCC)
129+
isNFCQC = unsafe _nfcQuickCheck(utf8, prevCCC: &prevCCC)
130130
}
131131
}
132132
}

stdlib/public/core/UTF8EncodingError.swift

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ extension Unicode.UTF8 {
6969
multi-byte scalar but is cut off before ending correctly). For all other
7070
errors (including overlong encodings, surrogates, and invalid code
7171
points), it will produce an error per byte.
72-
72+
7373
// FIXME: without a checkAllErrors, we don't have these classification distinctions, should we drop it, ensure we will do it, or what?
7474

7575
Since overlong encodings, surrogates, and invalid code points are erroneous
@@ -210,3 +210,52 @@ extension UTF8.ValidationError: CustomStringConvertible {
210210
"UTF8.ValidationError(\(kind), \(byteOffsets))"
211211
}
212212
}
213+
214+
@available(SwiftStdlib 6.1, *)
215+
extension UTF8 {
216+
@usableFromInline // for testing purposes
217+
internal static func _checkAllErrors(
218+
_ s: some Sequence<UInt8>
219+
) -> Array<UTF8.ValidationError> {
220+
// TODO: Span fast path
221+
// TODO: Fixed size buffer for non-contig inputs
222+
// TODO: Lifetime-dependent result variant
223+
let cus = Array(s)
224+
return unsafe cus.withUnsafeBytes {
225+
var bufPtr = unsafe $0
226+
var start = 0
227+
var errors: Array<UTF8.ValidationError> = []
228+
229+
// Remember the previous error, so that we can
230+
// apply it to subsequent bytes instead of reporting
231+
// just `.unexpectedContinuation`.
232+
var priorError: UTF8.ValidationError? = nil
233+
while true {
234+
do throws(UTF8.ValidationError) {
235+
_ = unsafe try bufPtr.baseAddress!._validateUTF8(limitedBy: bufPtr.count)
236+
return errors
237+
} catch {
238+
let adjustedRange =
239+
error.byteOffsets.lowerBound + start ..< error.byteOffsets.upperBound + start
240+
241+
let kind: UTF8.ValidationError.Kind
242+
if let prior = priorError,
243+
prior.byteOffsets.upperBound == adjustedRange.lowerBound,
244+
error.kind == .unexpectedContinuationByte
245+
{
246+
kind = prior.kind
247+
} else {
248+
kind = error.kind
249+
}
250+
let adjustedErr = UTF8.ValidationError(kind, adjustedRange)
251+
priorError = adjustedErr
252+
253+
let errEnd = error.byteOffsets.upperBound
254+
start += errEnd
255+
unsafe bufPtr = .init(rebasing: bufPtr[errEnd...])
256+
errors.append(adjustedErr)
257+
}
258+
}
259+
}
260+
}
261+
}

stdlib/public/core/UTF8Span.swift

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
/// TODO: docs
55
@frozen
6+
@safe
67
@available(SwiftStdlib 6.1, *)
78
public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
89
@usableFromInline
@@ -25,26 +26,36 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable {
2526

2627
// @_alwaysEmitIntoClient
2728
@inline(__always)
28-
@lifetime(borrow start)
29+
@lifetime(borrow start) // TODO: borrow or copy?
2930
internal init(
3031
_unsafeAssumingValidUTF8 start: borrowing UnsafeRawPointer,
3132
_countAndFlags: UInt64
3233
) {
33-
self._unsafeBaseAddress = copy start
34+
unsafe self._unsafeBaseAddress = copy start
3435
self._countAndFlags = _countAndFlags
3536

3637
_invariantCheck()
3738
}
3839

40+
@unsafe
41+
@lifetime(copy uncheckedCodeUnits) // TODO: borrow or copy?
42+
public init(
43+
unsafeAssumingValidUTF8 uncheckedCodeUnits: Span<UInt8>,
44+
isKnownASCII: Bool = false
45+
) {
46+
self.init(
47+
_uncheckedAssumingValidUTF8: uncheckedCodeUnits,
48+
isKnownASCII: isKnownASCII,
49+
isKnownNFC: false
50+
)
51+
}
52+
3953
// FIXME: we need to make sure ALL API are nil safe, that is they
4054
// at least check the count first
4155
@_alwaysEmitIntoClient
4256
internal func _start() -> UnsafeRawPointer {
43-
_unsafeBaseAddress._unsafelyUnwrappedUnchecked
57+
unsafe _unsafeBaseAddress._unsafelyUnwrappedUnchecked
4458
}
45-
46-
// HACK: working around lack of internal plumbing work
47-
internal var _str: String { _start()._str(0..<count) }
4859
}
4960

5061
// TODO: try to convert code to be ran on Span instead of URP
@@ -67,18 +78,16 @@ extension UTF8Span {
6778
internal init(
6879
_validating codeUnits: consuming Span<UInt8>
6980
) throws(UTF8.ValidationError) {
70-
guard let ptr = codeUnits._pointer else {
71-
self._unsafeBaseAddress = nil
81+
guard let basePtr = unsafe codeUnits._pointer else {
82+
unsafe self._unsafeBaseAddress = nil
7283
self._countAndFlags = 0
7384
return
7485
}
7586

76-
// FIXME: handle empty/null span
77-
let basePtr = codeUnits._start()
7887
let count = codeUnits._count
79-
let isASCII = try basePtr._validateUTF8(limitedBy: count)
88+
let isASCII = unsafe try basePtr._validateUTF8(limitedBy: count)
8089

81-
self._unsafeBaseAddress = .init(basePtr)
90+
unsafe self._unsafeBaseAddress = .init(basePtr)
8291
self._countAndFlags = UInt64(truncatingIfNeeded: count)
8392
if isASCII {
8493
_setIsASCII()
@@ -93,13 +102,13 @@ extension UTF8Span {
93102
isKnownASCII: Bool,
94103
isKnownNFC: Bool
95104
) {
96-
guard let ptr = codeUnits._pointer else {
97-
self._unsafeBaseAddress = nil
105+
guard let ptr = unsafe codeUnits._pointer else {
106+
unsafe self._unsafeBaseAddress = nil
98107
self._countAndFlags = 0
99108
return
100109
}
101110

102-
self._unsafeBaseAddress = codeUnits._start()
111+
unsafe self._unsafeBaseAddress = ptr
103112
self._countAndFlags = UInt64(truncatingIfNeeded: codeUnits.count)
104113
if isKnownASCII {
105114
_setIsASCII()
@@ -109,6 +118,9 @@ extension UTF8Span {
109118
}
110119
_internalInvariant(self.count == codeUnits.count)
111120
}
121+
122+
// HACK: working around lack of internal plumbing work
123+
internal var _str: String { unsafe _start()._str(0..<count) }
112124
}
113125

114126

@@ -135,7 +147,7 @@ extension UTF8Span {
135147
>(
136148
_ body: (_ buffer: /*borrowing*/ UnsafeBufferPointer<UInt8>) throws(E) -> Result
137149
) throws(E) -> Result {
138-
try body(_start()._ubp(0..<count))
150+
try unsafe body(_start()._ubp(0..<count))
139151
}
140152

141153
// TODO: withSpan or similar?
@@ -164,29 +176,43 @@ extension UTF8Span {
164176
public var span: Span<UInt8> {
165177
@lifetime(copy self)
166178
get {
167-
Span(_unchecked: _unsafeBaseAddress, count: self.count)
179+
unsafe Span(_unchecked: _unsafeBaseAddress, count: self.count)
168180
}
169181
}
170182

171183

172184
}
173185

174-
func UNSUPPORTED(_ message: String) -> Never {
175-
fatalError("UNSUPPORTED: \(message)")
176-
}
177-
178186
// TODO(toolchain): decide if we rebase on top of Guillaume's work
179187
@available(SwiftStdlib 6.1, *)
180188
extension String {
181189
public var utf8Span: UTF8Span {
182-
UNSUPPORTED("utf8Span property pending compiler fixes")
190+
@lifetime(borrow self)
191+
borrowing get {
192+
let isKnownASCII = _guts.isASCII
193+
let utf8 = self.utf8
194+
let span = utf8.span
195+
let result = unsafe UTF8Span(
196+
unsafeAssumingValidUTF8: span,
197+
isKnownASCII: isKnownASCII)
198+
return unsafe _overrideLifetime(result, borrowing: self)
199+
}
183200
}
184201
}
185202

186203
@available(SwiftStdlib 6.1, *)
187204
extension Substring {
188205
public var utf8Span: UTF8Span {
189-
UNSUPPORTED("utf8Span property pending compiler fixes")
206+
@lifetime(borrow self)
207+
borrowing get {
208+
let isKnownASCII = base._guts.isASCII
209+
let utf8 = self.utf8
210+
let span = utf8.span
211+
let result = unsafe UTF8Span(
212+
unsafeAssumingValidUTF8: span,
213+
isKnownASCII: isKnownASCII)
214+
return unsafe _overrideLifetime(result, borrowing: self)
215+
}
190216
}
191217
}
192218

stdlib/public/core/UTF8SpanBits.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ extension UTF8Span {
2424
public mutating func checkForASCII() -> Bool {
2525
if isKnownASCII { return true }
2626

27-
let result = _withUnsafeBufferPointer {
28-
_allASCII($0)
27+
let result = unsafe _withUnsafeBufferPointer {
28+
unsafe _allASCII($0)
2929
}
3030
if result {
3131
_setIsASCII()
@@ -72,9 +72,9 @@ extension UTF8Span {
7272
if isKnownNFC { return true }
7373

7474
if quickCheck {
75-
let result = _withUnsafeBufferPointer { utf8 in
75+
let result = unsafe _withUnsafeBufferPointer { utf8 in
7676
var prevCCC: UInt8 = 0
77-
return _nfcQuickCheck(utf8, prevCCC: &prevCCC)
77+
return unsafe _nfcQuickCheck(utf8, prevCCC: &prevCCC)
7878
}
7979
if result {
8080
self._countAndFlags |= Self._nfcBit
@@ -84,7 +84,7 @@ extension UTF8Span {
8484

8585
// TODO: use faster internal algorithm
8686
let normalized = _str._nfcCodeUnits
87-
guard _start()._urbp(
87+
guard unsafe _start()._urbp(
8888
0..<count
8989
).elementsEqual(normalized) else {
9090
return false

stdlib/public/core/UTF8SpanComparisons.swift

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ extension UTF8Span {
66
/// Whether this span has the same bytes as `other`.
77
@_alwaysEmitIntoClient
88
public func bytesEqual(to other: some Sequence<UInt8>) -> Bool {
9-
_withUnsafeBufferPointer { $0.elementsEqual(other) }
9+
unsafe _withUnsafeBufferPointer { unsafe $0.elementsEqual(other) }
1010
}
1111

1212
/// Whether this span has the same `Unicode.Scalar`s as `other`.
@@ -57,9 +57,9 @@ extension UTF8Span {
5757
public func isCanonicallyEquivalent(
5858
to other: UTF8Span
5959
) -> Bool {
60-
self._withUnsafeBufferPointer { selfBufPtr in
61-
other._withUnsafeBufferPointer { otherBufPtr in
62-
_stringCompareFastUTF8(
60+
unsafe self._withUnsafeBufferPointer { selfBufPtr in
61+
unsafe other._withUnsafeBufferPointer { otherBufPtr in
62+
unsafe _stringCompareFastUTF8(
6363
selfBufPtr,
6464
otherBufPtr,
6565
expecting: .equal,
@@ -73,9 +73,9 @@ extension UTF8Span {
7373
public func isCanonicallyLessThan(
7474
_ other: UTF8Span
7575
) -> Bool {
76-
self._withUnsafeBufferPointer { selfBufPtr in
77-
other._withUnsafeBufferPointer { otherBufPtr in
78-
_stringCompareFastUTF8(
76+
unsafe self._withUnsafeBufferPointer { selfBufPtr in
77+
unsafe other._withUnsafeBufferPointer { otherBufPtr in
78+
unsafe _stringCompareFastUTF8(
7979
selfBufPtr,
8080
otherBufPtr,
8181
expecting: .less,
@@ -85,15 +85,16 @@ extension UTF8Span {
8585
}
8686
}
8787

88-
@available(SwiftStdlib 6.1, *)
89-
extension UTF8Span {
90-
public static func ~=(_ lhs: StaticString, _ rhs: UTF8Span) -> Bool {
91-
return lhs.withUTF8Buffer { str in
92-
rhs._withUnsafeBufferPointer { span in
93-
str.elementsEqual(span)
94-
}
95-
}
96-
}
97-
}
88+
// // FIXME: remove
89+
// @available(SwiftStdlib 6.1, *)
90+
// extension UTF8Span {
91+
// public static func ~=(_ lhs: StaticString, _ rhs: UTF8Span) -> Bool {
92+
// return lhs.withUTF8Buffer { str in
93+
// rhs._withUnsafeBufferPointer { span in
94+
// str.elementsEqual(span)
95+
// }
96+
// }
97+
// }
98+
// }
9899

99100

0 commit comments

Comments
 (0)