Skip to content

UTF8Span #78531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 11, 2025
Merged

UTF8Span #78531

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Runtimes/Core/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ add_library(swiftCore
UnsafeRawPointer.swift
UTFEncoding.swift
UTF8.swift
UTF8EncodingError.swift
UTF8Span.swift
UTF8SpanBits.swift
UTF8SpanComparisons.swift
UTF8SpanFundamentals.swift
UTF8SpanInternalHelpers.swift
UTF8SpanIterators.swift
UTF16.swift
UTF32.swift
Unicode.swift # ORDER DEPENDENCY: must follow new unicode support
Expand Down
7 changes: 7 additions & 0 deletions stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ split_embedded_sources(
EMBEDDED UnsafeRawPointer.swift
EMBEDDED UTFEncoding.swift
EMBEDDED UTF8.swift
EMBEDDED UTF8EncodingError.swift
EMBEDDED UTF8Span.swift
EMBEDDED UTF8SpanBits.swift
EMBEDDED UTF8SpanComparisons.swift
EMBEDDED UTF8SpanFundamentals.swift
EMBEDDED UTF8SpanInternalHelpers.swift
EMBEDDED UTF8SpanIterators.swift
EMBEDDED UTF16.swift
EMBEDDED UTF32.swift
EMBEDDED Unicode.swift # ORDER DEPENDENCY: must follow new unicode support
Expand Down
9 changes: 9 additions & 0 deletions stdlib/public/core/GroupInfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,15 @@
"RawSpan.swift",
"Span.swift"
],
"UTF8Span": [
"UTF8EncodingError.swift",
"UTF8Span.swift",
"UTF8SpanBits.swift",
"UTF8SpanComparisons.swift",
"UTF8SpanFundamentals.swift",
"UTF8SpanInternalHelpers.swift",
"UTF8SpanIterators.swift"
],
"Protocols": [
"CompilerProtocols.swift",
"ShadowProtocols.swift"
Expand Down
104 changes: 0 additions & 104 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1112,108 +1112,4 @@ extension String {
}
}

extension _StringGutsSlice {
internal func _isScalarNFCQC(
_ scalar: Unicode.Scalar,
_ prevCCC: inout UInt8
) -> Bool {
let normData = Unicode._NormData(scalar, fastUpperbound: 0x300)

if prevCCC > normData.ccc, normData.ccc != 0 {
return false
}

if !normData.isNFCQC {
return false
}

prevCCC = normData.ccc
return true
}

internal func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows {
let substring = String(_guts)[range]
// Fast path: If we're already NFC (or ASCII), then we don't need to do
// anything at all.
if _fastPath(_guts.isNFC) {
try substring.utf8.forEach(f)
return
}

var isNFCQC = true
var prevCCC: UInt8 = 0

if _guts.isFastUTF8 {
_fastNFCCheck(&isNFCQC, &prevCCC)

// Because we have access to the fastUTF8, we can go through that instead
// of accessing the UTF8 view on String.
if isNFCQC {
try unsafe withFastUTF8 {
for unsafe byte in unsafe $0 {
try f(byte)
}
}

return
}
} else {
for scalar in substring.unicodeScalars {
if !_isScalarNFCQC(scalar, &prevCCC) {
isNFCQC = false
break
}
}

if isNFCQC {
for byte in substring.utf8 {
try f(byte)
}

return
}
}

for scalar in substring.unicodeScalars._internalNFC {
try scalar.withUTF8CodeUnits {
for unsafe byte in unsafe $0 {
try f(byte)
}
}
}
}

internal func _fastNFCCheck(_ isNFCQC: inout Bool, _ prevCCC: inout UInt8) {
unsafe withFastUTF8 { utf8 in
var position = 0

while position < utf8.count {
// If our first byte is less than 0xCC, then it means we're under the
// 0x300 scalar value and everything up to 0x300 is NFC already.
if unsafe utf8[position] < 0xCC {
// If our first byte is less than 0xC0, then it means it is ASCII
// and only takes up a single byte.
if unsafe utf8[position] < 0xC0 {
position &+= 1
} else {
// Otherwise, this is a 2 byte < 0x300 sequence.
position &+= 2
}
// ASCII always has ccc of 0.
prevCCC = 0

continue
}

let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: position)

if !_isScalarNFCQC(scalar, &prevCCC) {
isNFCQC = false
return
}

position &+= len
}
}
}
}
2 changes: 1 addition & 1 deletion stdlib/public/core/StringComparison.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ internal func _stringCompareInternal(
}

@_effects(readonly)
private func _stringCompareFastUTF8(
internal func _stringCompareFastUTF8(
_ utf8Left: UnsafeBufferPointer<UInt8>,
_ utf8Right: UnsafeBufferPointer<UInt8>,
expecting: _StringComparisonResult,
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ extension String {
return unsafe (String._uncheckedFromUTF8(
input, asciiPreScanResult: extraInfo.isASCII
), false)
case .error(let initialRange):
case .error(_, let initialRange):
return unsafe (repairUTF8(input, firstKnownBrokenRange: initialRange), true)
}
}
Expand All @@ -139,7 +139,7 @@ extension String {
newIsASCII: info.isASCII
)
return result.asString
case .error(let initialRange):
case .error(_, let initialRange):
defer { _fixLifetime(result) }
//This could be optimized to use excess tail capacity
return unsafe repairUTF8(result.codeUnits, firstKnownBrokenRange: initialRange)
Expand Down
Loading