Skip to content

Commit 5fb7692

Browse files
committed
WIP: UTF8Span
wip: closer to proposal wip wip: more implementation wip: add back in comparison functions wip: more iterator testing Detangle grapheme breaking guts from string guts Further untangling wip wip: use internal grapheme breaking Call internal impl directly wip refactoring wip: might be broken wip: update to lifetime changes wip: wire up nfc quick check wip: validation error rename
1 parent d86e1c7 commit 5fb7692

17 files changed

+3036
-398
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,13 @@ split_embedded_sources(
216216
EMBEDDED UTF8.swift
217217
EMBEDDED UTF16.swift
218218
EMBEDDED UTF32.swift
219+
EMBEDDED UTF8Span.swift
220+
EMBEDDED UTF8SpanBits.swift
221+
EMBEDDED UTF8SpanComparisons.swift
222+
EMBEDDED UTF8SpanFundamentals.swift
223+
EMBEDDED UTF8SpanInternalHelpers.swift
224+
EMBEDDED UTF8SpanIterators.swift
225+
EMBEDDED UTF8EncodingError.swift
219226
EMBEDDED Unicode.swift # ORDER DEPENDENCY: must follow new unicode support
220227
EMBEDDED StringGraphemeBreaking.swift # ORDER DEPENDENCY: Must follow UTF16.swift
221228
EMBEDDED ValidUTF8Buffer.swift

stdlib/public/core/GroupInfo.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,15 @@
205205
"RawSpan.swift",
206206
"Span.swift"
207207
],
208+
"UTF8Span": [
209+
"UTF8EncodingError.swift",
210+
"UTF8Span.swift",
211+
"UTF8SpanBits.swift",
212+
"UTF8SpanComparisons.swift",
213+
"UTF8SpanFundamentals.swift",
214+
"UTF8SpanInternalHelpers.swift",
215+
"UTF8SpanIterators.swift"
216+
],
208217
"Protocols": [
209218
"CompilerProtocols.swift",
210219
"ShadowProtocols.swift"

stdlib/public/core/String.swift

Lines changed: 0 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,108 +1112,4 @@ extension String {
11121112
}
11131113
}
11141114

1115-
extension _StringGutsSlice {
1116-
internal func _isScalarNFCQC(
1117-
_ scalar: Unicode.Scalar,
1118-
_ prevCCC: inout UInt8
1119-
) -> Bool {
1120-
let normData = Unicode._NormData(scalar, fastUpperbound: 0x300)
11211115

1122-
if prevCCC > normData.ccc, normData.ccc != 0 {
1123-
return false
1124-
}
1125-
1126-
if !normData.isNFCQC {
1127-
return false
1128-
}
1129-
1130-
prevCCC = normData.ccc
1131-
return true
1132-
}
1133-
1134-
internal func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows {
1135-
let substring = String(_guts)[range]
1136-
// Fast path: If we're already NFC (or ASCII), then we don't need to do
1137-
// anything at all.
1138-
if _fastPath(_guts.isNFC) {
1139-
try substring.utf8.forEach(f)
1140-
return
1141-
}
1142-
1143-
var isNFCQC = true
1144-
var prevCCC: UInt8 = 0
1145-
1146-
if _guts.isFastUTF8 {
1147-
_fastNFCCheck(&isNFCQC, &prevCCC)
1148-
1149-
// Because we have access to the fastUTF8, we can go through that instead
1150-
// of accessing the UTF8 view on String.
1151-
if isNFCQC {
1152-
try unsafe withFastUTF8 {
1153-
for unsafe byte in unsafe $0 {
1154-
try f(byte)
1155-
}
1156-
}
1157-
1158-
return
1159-
}
1160-
} else {
1161-
for scalar in substring.unicodeScalars {
1162-
if !_isScalarNFCQC(scalar, &prevCCC) {
1163-
isNFCQC = false
1164-
break
1165-
}
1166-
}
1167-
1168-
if isNFCQC {
1169-
for byte in substring.utf8 {
1170-
try f(byte)
1171-
}
1172-
1173-
return
1174-
}
1175-
}
1176-
1177-
for scalar in substring.unicodeScalars._internalNFC {
1178-
try scalar.withUTF8CodeUnits {
1179-
for unsafe byte in unsafe $0 {
1180-
try f(byte)
1181-
}
1182-
}
1183-
}
1184-
}
1185-
1186-
internal func _fastNFCCheck(_ isNFCQC: inout Bool, _ prevCCC: inout UInt8) {
1187-
unsafe withFastUTF8 { utf8 in
1188-
var position = 0
1189-
1190-
while position < utf8.count {
1191-
// If our first byte is less than 0xCC, then it means we're under the
1192-
// 0x300 scalar value and everything up to 0x300 is NFC already.
1193-
if unsafe utf8[position] < 0xCC {
1194-
// If our first byte is less than 0xC0, then it means it is ASCII
1195-
// and only takes up a single byte.
1196-
if unsafe utf8[position] < 0xC0 {
1197-
position &+= 1
1198-
} else {
1199-
// Otherwise, this is a 2 byte < 0x300 sequence.
1200-
position &+= 2
1201-
}
1202-
// ASCII always has ccc of 0.
1203-
prevCCC = 0
1204-
1205-
continue
1206-
}
1207-
1208-
let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: position)
1209-
1210-
if !_isScalarNFCQC(scalar, &prevCCC) {
1211-
isNFCQC = false
1212-
return
1213-
}
1214-
1215-
position &+= len
1216-
}
1217-
}
1218-
}
1219-
}

stdlib/public/core/StringComparison.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ internal func _stringCompareInternal(
9797
}
9898

9999
@_effects(readonly)
100-
private func _stringCompareFastUTF8(
100+
internal func _stringCompareFastUTF8(
101101
_ utf8Left: UnsafeBufferPointer<UInt8>,
102102
_ utf8Right: UnsafeBufferPointer<UInt8>,
103103
expecting: _StringComparisonResult,

0 commit comments

Comments
 (0)