Skip to content

Commit 523ffcb

Browse files
authored
Port UIDNAHook to FoundationInternationalization (#746)
1 parent 944aead commit 523ffcb

File tree

4 files changed

+250
-2
lines changed

4 files changed

+250
-2
lines changed

Sources/FoundationEssentials/String/String+Comparison.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
extension UTF8.CodeUnit {
13+
package extension UTF8.CodeUnit {
1414
static let newline: Self = 0x0A
1515
static let carriageReturn: Self = 0x0D
1616

Sources/FoundationInternationalization/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ add_library(FoundationInternationalization
1717
Date+ICU.swift
1818
Duration+Utils.swift
1919
RangeExpression.swift
20-
TimeInterval+Utils.swift)
20+
TimeInterval+Utils.swift
21+
URLParser+ICU.swift)
2122

2223
add_subdirectory(Calendar)
2324
add_subdirectory(Formatting)
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2024 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#if canImport(FoundationEssentials)
13+
import FoundationEssentials
14+
#endif
15+
16+
internal import _FoundationICU
17+
18+
internal final class UIDNAHookICU: UIDNAHook {
19+
// `Sendable` notes: `UIDNA` from ICU is thread safe.
20+
struct UIDNAPointer : @unchecked Sendable {
21+
init(_ ptr: OpaquePointer?) { self.idnaTranscoder = ptr }
22+
var idnaTranscoder: OpaquePointer?
23+
}
24+
25+
private static func U_SUCCESS(_ x: Int32) -> Bool {
26+
return x <= U_ZERO_ERROR.rawValue
27+
}
28+
29+
private static let idnaTranscoder: UIDNAPointer? = {
30+
var status = U_ZERO_ERROR
31+
let options = UInt32(
32+
UIDNA_CHECK_BIDI |
33+
UIDNA_CHECK_CONTEXTJ |
34+
UIDNA_NONTRANSITIONAL_TO_UNICODE |
35+
UIDNA_NONTRANSITIONAL_TO_ASCII
36+
)
37+
let encoder = uidna_openUTS46(options, &status)
38+
guard U_SUCCESS(status.rawValue) else {
39+
return nil
40+
}
41+
return UIDNAPointer(encoder)
42+
}()
43+
44+
private static func shouldAllow(_ errors: UInt32, encodeToASCII: Bool) -> Bool {
45+
let allowedErrors: UInt32
46+
if encodeToASCII {
47+
allowedErrors = 0
48+
} else {
49+
allowedErrors = UInt32(
50+
UIDNA_ERROR_EMPTY_LABEL |
51+
UIDNA_ERROR_LABEL_TOO_LONG |
52+
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG |
53+
UIDNA_ERROR_LEADING_HYPHEN |
54+
UIDNA_ERROR_TRAILING_HYPHEN |
55+
UIDNA_ERROR_HYPHEN_3_4
56+
)
57+
}
58+
return errors & ~allowedErrors == 0
59+
}
60+
61+
/// Type of `uidna_nameToASCII` and `uidna_nameToUnicode` functions
62+
private typealias TranscodingFunction<T> = (OpaquePointer?, UnsafePointer<T>?, Int32, UnsafeMutablePointer<T>?, Int32, UnsafeMutablePointer<UIDNAInfo>?, UnsafeMutablePointer<UErrorCode>?) -> Int32
63+
64+
private static func IDNACodedHost<T: FixedWidthInteger>(
65+
hostBuffer: UnsafeBufferPointer<T>,
66+
transcode: TranscodingFunction<T>,
67+
allowErrors: (UInt32) -> Bool,
68+
createString: (UnsafeMutablePointer<T>, Int) -> String?
69+
) -> String? {
70+
let maxHostBufferLength = 2048
71+
if hostBuffer.count > maxHostBufferLength {
72+
return nil
73+
}
74+
75+
guard let transcoder = idnaTranscoder else {
76+
return nil
77+
}
78+
79+
let result: String? = withUnsafeTemporaryAllocation(of: T.self, capacity: maxHostBufferLength) { outBuffer in
80+
var processingDetails = UIDNAInfo(
81+
size: Int16(MemoryLayout<UIDNAInfo>.size),
82+
isTransitionalDifferent: 0,
83+
reservedB3: 0,
84+
errors: 0,
85+
reservedI2: 0,
86+
reservedI3: 0
87+
)
88+
var error = U_ZERO_ERROR
89+
90+
let hostBufferPtr = hostBuffer.baseAddress!
91+
let outBufferPtr = outBuffer.baseAddress!
92+
93+
let charsConverted = transcode(
94+
transcoder.idnaTranscoder,
95+
hostBufferPtr,
96+
Int32(hostBuffer.count),
97+
outBufferPtr,
98+
Int32(outBuffer.count),
99+
&processingDetails,
100+
&error
101+
)
102+
103+
if U_SUCCESS(error.rawValue), allowErrors(processingDetails.errors), charsConverted > 0 {
104+
return createString(outBufferPtr, Int(charsConverted))
105+
}
106+
return nil
107+
}
108+
return result
109+
}
110+
111+
private static func IDNACodedHostUTF8(_ utf8Buffer: UnsafeBufferPointer<UInt8>, encodeToASCII: Bool) -> String? {
112+
var transcode = uidna_nameToUnicodeUTF8
113+
if encodeToASCII {
114+
transcode = uidna_nameToASCII_UTF8
115+
}
116+
return utf8Buffer.withMemoryRebound(to: CChar.self) { charBuffer in
117+
return IDNACodedHost(
118+
hostBuffer: charBuffer,
119+
transcode: transcode,
120+
allowErrors: { errors in
121+
shouldAllow(errors, encodeToASCII: encodeToASCII)
122+
},
123+
createString: { ptr, count in
124+
let outBuffer = UnsafeBufferPointer(start: ptr, count: count).withMemoryRebound(to: UInt8.self) { $0 }
125+
var hostsAreEqual = false
126+
if outBuffer.count == utf8Buffer.count {
127+
hostsAreEqual = true
128+
for i in 0..<outBuffer.count {
129+
if utf8Buffer[i] == outBuffer[i] {
130+
continue
131+
}
132+
guard utf8Buffer[i]._lowercased == outBuffer[i] else {
133+
hostsAreEqual = false
134+
break
135+
}
136+
}
137+
}
138+
if hostsAreEqual {
139+
return String._tryFromUTF8(utf8Buffer)
140+
} else {
141+
return String._tryFromUTF8(outBuffer)
142+
}
143+
}
144+
)
145+
}
146+
}
147+
148+
private static func IDNACodedHostUTF16(_ utf16Buffer: UnsafeBufferPointer<UInt16>, encodeToASCII: Bool) -> String? {
149+
var transcode = uidna_nameToUnicode
150+
if encodeToASCII {
151+
transcode = uidna_nameToASCII
152+
}
153+
return IDNACodedHost(
154+
hostBuffer: utf16Buffer,
155+
transcode: transcode,
156+
allowErrors: { errors in
157+
shouldAllow(errors, encodeToASCII: encodeToASCII)
158+
},
159+
createString: { ptr, count in
160+
let outBuffer = UnsafeBufferPointer(start: ptr, count: count)
161+
var hostsAreEqual = false
162+
if outBuffer.count == utf16Buffer.count {
163+
hostsAreEqual = true
164+
for i in 0..<outBuffer.count {
165+
if utf16Buffer[i] == outBuffer[i] {
166+
continue
167+
}
168+
guard utf16Buffer[i] < 128,
169+
UInt8(utf16Buffer[i])._lowercased == outBuffer[i] else {
170+
hostsAreEqual = false
171+
break
172+
}
173+
}
174+
}
175+
if hostsAreEqual {
176+
return String(_utf16: utf16Buffer)
177+
} else {
178+
return String(_utf16: outBuffer)
179+
}
180+
}
181+
)
182+
}
183+
184+
private static func IDNACodedHost(_ host: some StringProtocol, encodeToASCII: Bool) -> String? {
185+
let fastResult = host.utf8.withContiguousStorageIfAvailable {
186+
IDNACodedHostUTF8($0, encodeToASCII: encodeToASCII)
187+
}
188+
if let fastResult {
189+
return fastResult
190+
}
191+
#if FOUNDATION_FRAMEWORK
192+
if let fastCharacters = host._ns._fastCharacterContents() {
193+
let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: host._ns.length)
194+
return IDNACodedHostUTF16(charsBuffer, encodeToASCII: encodeToASCII)
195+
}
196+
#endif
197+
var hostString = String(host)
198+
return hostString.withUTF8 {
199+
IDNACodedHostUTF8($0, encodeToASCII: encodeToASCII)
200+
}
201+
}
202+
203+
static func encode(_ host: some StringProtocol) -> String? {
204+
return IDNACodedHost(host, encodeToASCII: true)
205+
}
206+
207+
static func decode(_ host: some StringProtocol) -> String? {
208+
return IDNACodedHost(host, encodeToASCII: false)
209+
}
210+
211+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2024 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#if FOUNDATION_FRAMEWORK
14+
@testable import Foundation
15+
#else
16+
@testable import FoundationEssentials
17+
@testable import FoundationInternationalization
18+
#endif // FOUNDATION_FRAMEWORK
19+
20+
#if canImport(TestSupport)
21+
import TestSupport
22+
#endif
23+
24+
final class URLUIDNATests: XCTestCase {
25+
func testURLHostUIDNAEncoding() {
26+
let emojiURL = URL(string: "https://i❤️tacos.ws/🏳️‍🌈/冰淇淋")
27+
let emojiURLEncoded = "https://xn--itacos-i50d.ws/%F0%9F%8F%B3%EF%B8%8F%E2%80%8D%F0%9F%8C%88/%E5%86%B0%E6%B7%87%E6%B7%8B"
28+
XCTAssertEqual(emojiURL?.absoluteString, emojiURLEncoded)
29+
XCTAssertEqual(emojiURL?.host(percentEncoded: false), "xn--itacos-i50d.ws")
30+
31+
let chineseURL = URL(string: "http://見.香港/热狗/🌭")
32+
let chineseURLEncoded = "http://xn--nw2a.xn--j6w193g/%E7%83%AD%E7%8B%97/%F0%9F%8C%AD"
33+
XCTAssertEqual(chineseURL?.absoluteString, chineseURLEncoded)
34+
XCTAssertEqual(chineseURL?.host(percentEncoded: false), "xn--nw2a.xn--j6w193g")
35+
}
36+
}

0 commit comments

Comments
 (0)