Skip to content

Commit 9ab21d3

Browse files
authored
Merge pull request swiftlang#36623 from xwu/better-atoi
[stdlib][SR-7556] Re-implement string-to-integer parsing
2 parents 717a132 + cef11cd commit 9ab21d3

File tree

2 files changed

+176
-95
lines changed

2 files changed

+176
-95
lines changed

stdlib/public/core/IntegerParsing.swift

Lines changed: 159 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,175 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
99
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
@_alwaysEmitIntoClient
14+
internal func _parseIntegerDigits<Result: FixedWidthInteger>(
15+
ascii codeUnits: UnsafeBufferPointer<UInt8>, radix: Int, isNegative: Bool
16+
) -> Result? {
17+
_internalInvariant(radix >= 2 && radix <= 36)
18+
guard _fastPath(!codeUnits.isEmpty) else { return nil }
19+
20+
// ASCII constants, named for clarity:
21+
let _0 = 48 as UInt8, _A = 65 as UInt8, _a = 97 as UInt8
22+
23+
let numericalUpperBound: UInt8
24+
let uppercaseUpperBound: UInt8
25+
let lowercaseUpperBound: UInt8
26+
if radix <= 10 {
27+
numericalUpperBound = _0 &+ UInt8(truncatingIfNeeded: radix)
28+
uppercaseUpperBound = _A
29+
lowercaseUpperBound = _a
30+
} else {
31+
numericalUpperBound = _0 &+ 10
32+
uppercaseUpperBound = _A &+ UInt8(truncatingIfNeeded: radix &- 10)
33+
lowercaseUpperBound = _a &+ UInt8(truncatingIfNeeded: radix &- 10)
34+
}
35+
let multiplicand = Result(truncatingIfNeeded: radix)
36+
var result = 0 as Result
37+
for digit in codeUnits {
38+
let digitValue: Result
39+
if _fastPath(digit >= _0 && digit < numericalUpperBound) {
40+
digitValue = Result(truncatingIfNeeded: digit &- _0)
41+
} else if _fastPath(digit >= _A && digit < uppercaseUpperBound) {
42+
digitValue = Result(truncatingIfNeeded: digit &- _A &+ 10)
43+
} else if _fastPath(digit >= _a && digit < lowercaseUpperBound) {
44+
digitValue = Result(truncatingIfNeeded: digit &- _a &+ 10)
45+
} else {
46+
return nil
47+
}
48+
let overflow1: Bool
49+
(result, overflow1) = result.multipliedReportingOverflow(by: multiplicand)
50+
let overflow2: Bool
51+
(result, overflow2) = isNegative
52+
? result.subtractingReportingOverflow(digitValue)
53+
: result.addingReportingOverflow(digitValue)
54+
guard _fastPath(!overflow1 && !overflow2) else { return nil }
55+
}
56+
return result
57+
}
58+
59+
@_alwaysEmitIntoClient
60+
internal func _parseInteger<Result: FixedWidthInteger>(
61+
ascii codeUnits: UnsafeBufferPointer<UInt8>, radix: Int
62+
) -> Result? {
63+
_internalInvariant(!codeUnits.isEmpty)
64+
65+
// ASCII constants, named for clarity:
66+
let _plus = 43 as UInt8, _minus = 45 as UInt8
67+
68+
let first = codeUnits[0]
69+
if first == _minus {
70+
return _parseIntegerDigits(
71+
ascii: UnsafeBufferPointer(rebasing: codeUnits[1...]),
72+
radix: radix, isNegative: true)
73+
}
74+
if first == _plus {
75+
return _parseIntegerDigits(
76+
ascii: UnsafeBufferPointer(rebasing: codeUnits[1...]),
77+
radix: radix, isNegative: false)
78+
}
79+
return _parseIntegerDigits(ascii: codeUnits, radix: radix, isNegative: false)
80+
}
81+
82+
@_alwaysEmitIntoClient
83+
@inline(never)
84+
internal func _parseInteger<S: StringProtocol, Result: FixedWidthInteger>(
85+
ascii text: S, radix: Int
86+
) -> Result? {
87+
var str = String(text)
88+
return str.withUTF8 { _parseInteger(ascii: $0, radix: radix) }
89+
}
90+
91+
extension FixedWidthInteger {
92+
/// Creates a new integer value from the given string and radix.
93+
///
94+
/// The string passed as `text` may begin with a plus or minus sign character
95+
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
96+
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
97+
///
98+
/// let x = Int("123")
99+
/// // x == 123
100+
///
101+
/// let y = Int("-123", radix: 8)
102+
/// // y == -83
103+
/// let y = Int("+123", radix: 8)
104+
/// // y == +83
105+
///
106+
/// let z = Int("07b", radix: 16)
107+
/// // z == 123
108+
///
109+
/// If `text` is in an invalid format or contains characters that are out of
110+
/// bounds for the given `radix`, or if the value it denotes in the given
111+
/// `radix` is not representable, the result is `nil`. For example, the
112+
/// following conversions result in `nil`:
113+
///
114+
/// Int(" 100") // Includes whitespace
115+
/// Int("21-50") // Invalid format
116+
/// Int("ff6600") // Characters out of bounds
117+
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
118+
///
119+
/// - Parameters:
120+
/// - text: The ASCII representation of a number in the radix passed as
121+
/// `radix`.
122+
/// - radix: The radix, or base, to use for converting `text` to an integer
123+
/// value. `radix` must be in the range `2...36`. The default is 10.
124+
@inlinable
125+
@inline(__always)
126+
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
127+
_precondition(2...36 ~= radix, "Radix not in range 2...36")
128+
guard _fastPath(!text.isEmpty) else { return nil }
129+
let result: Self? =
130+
text.utf8.withContiguousStorageIfAvailable {
131+
_parseInteger(ascii: $0, radix: radix)
132+
} ?? _parseInteger(ascii: text, radix: radix)
133+
guard let result_ = result else { return nil }
134+
self = result_
135+
}
136+
137+
/// Creates a new integer value from the given string.
138+
///
139+
/// The string passed as `description` may begin with a plus or minus sign
140+
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
141+
///
142+
/// let x = Int("123")
143+
/// // x == 123
144+
///
145+
/// If `description` is in an invalid format, or if the value it denotes in
146+
/// base 10 is not representable, the result is `nil`. For example, the
147+
/// following conversions result in `nil`:
148+
///
149+
/// Int(" 100") // Includes whitespace
150+
/// Int("21-50") // Invalid format
151+
/// Int("ff6600") // Characters out of bounds
152+
/// Int("10000000000000000000000000") // Out of range
153+
///
154+
/// - Parameter description: The ASCII representation of a number.
155+
@inlinable
156+
@inline(__always)
157+
public init?(_ description: String) {
158+
self.init(description, radix: 10)
159+
}
160+
}
161+
162+
//===----------------------------------------------------------------------===//
163+
// Old entry points preserved for ABI compatibility.
164+
//===----------------------------------------------------------------------===//
165+
13166
/// Returns c as a UTF16.CodeUnit. Meant to be used as _ascii16("x").
14-
@inlinable
167+
@usableFromInline // Previously '@inlinable'.
15168
internal func _ascii16(_ c: Unicode.Scalar) -> UTF16.CodeUnit {
16169
_internalInvariant(c.value >= 0 && c.value <= 0x7F, "not ASCII")
17170
return UTF16.CodeUnit(c.value)
18171
}
19172

20-
@inlinable
21-
@inline(__always)
173+
@usableFromInline // Previously '@inlinable @inline(__always)'.
22174
internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
23175
codeUnit u_: CodeUnit, radix: Result
24176
) -> Result? {
@@ -36,8 +188,7 @@ internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
36188
return Result(truncatingIfNeeded: d)
37189
}
38190

39-
@inlinable
40-
@inline(__always)
191+
@usableFromInline // Previously '@inlinable @inline(__always)'.
41192
internal func _parseUnsignedASCII<
42193
Rest: IteratorProtocol, Result: FixedWidthInteger
43194
>(
@@ -67,13 +218,10 @@ where Rest.Element: UnsignedInteger {
67218
return result
68219
}
69220

70-
//
71-
// TODO (TODO: JIRA): This needs to be completely rewritten. It's about 20KB of
221+
// This function has been superseded because it is about 20KB of previously
72222
// always-inline code, most of which are MOV instructions.
73-
//
74223

75-
@inlinable
76-
@inline(__always)
224+
@usableFromInline // Previously '@inlinable @inline(__always)'.
77225
internal func _parseASCII<
78226
CodeUnits: IteratorProtocol, Result: FixedWidthInteger
79227
>(
@@ -113,88 +261,4 @@ extension FixedWidthInteger {
113261
where CodeUnits.Element: UnsignedInteger {
114262
return _parseASCII(codeUnits: &codeUnits, radix: radix)
115263
}
116-
117-
/// Creates a new integer value from the given string and radix.
118-
///
119-
/// The string passed as `text` may begin with a plus or minus sign character
120-
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
121-
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
122-
///
123-
/// let x = Int("123")
124-
/// // x == 123
125-
///
126-
/// let y = Int("-123", radix: 8)
127-
/// // y == -83
128-
/// let y = Int("+123", radix: 8)
129-
/// // y == +83
130-
///
131-
/// let z = Int("07b", radix: 16)
132-
/// // z == 123
133-
///
134-
/// If `text` is in an invalid format or contains characters that are out of
135-
/// bounds for the given `radix`, or if the value it denotes in the given
136-
/// `radix` is not representable, the result is `nil`. For example, the
137-
/// following conversions result in `nil`:
138-
///
139-
/// Int(" 100") // Includes whitespace
140-
/// Int("21-50") // Invalid format
141-
/// Int("ff6600") // Characters out of bounds
142-
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
143-
///
144-
/// - Parameters:
145-
/// - text: The ASCII representation of a number in the radix passed as
146-
/// `radix`.
147-
/// - radix: The radix, or base, to use for converting `text` to an integer
148-
/// value. `radix` must be in the range `2...36`. The default is 10.
149-
@inlinable // @specializable
150-
@_semantics("optimize.sil.specialize.generic.partial.never")
151-
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
152-
_precondition(2...36 ~= radix, "Radix not in range 2...36")
153-
154-
if let str = text as? String, str._guts.isFastUTF8 {
155-
guard let ret = str._guts.withFastUTF8 ({ utf8 -> Self? in
156-
var iter = utf8.makeIterator()
157-
return _parseASCII(codeUnits: &iter, radix: Self(radix))
158-
}) else {
159-
return nil
160-
}
161-
self = ret
162-
return
163-
}
164-
165-
// TODO(String performance): We can provide fast paths for common radices,
166-
// native UTF-8 storage, etc.
167-
168-
var iter = text.utf8.makeIterator()
169-
guard let ret = Self._parseASCIISlowPath(
170-
codeUnits: &iter, radix: Self(radix)
171-
) else { return nil }
172-
173-
self = ret
174-
}
175-
176-
/// Creates a new integer value from the given string.
177-
///
178-
/// The string passed as `description` may begin with a plus or minus sign
179-
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
180-
///
181-
/// let x = Int("123")
182-
/// // x == 123
183-
///
184-
/// If `description` is in an invalid format, or if the value it denotes in
185-
/// base 10 is not representable, the result is `nil`. For example, the
186-
/// following conversions result in `nil`:
187-
///
188-
/// Int(" 100") // Includes whitespace
189-
/// Int("21-50") // Invalid format
190-
/// Int("ff6600") // Characters out of bounds
191-
/// Int("10000000000000000000000000") // Out of range
192-
///
193-
/// - Parameter description: The ASCII representation of a number.
194-
@inlinable
195-
@_semantics("optimize.sil.specialize.generic.partial.never")
196-
@inline(__always)
197-
public init?(_ description: String) {
198-
self.init(description, radix: 10)
199-
}
200264
}

test/stdlib/NSSlowString.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,23 @@ tests.test("Iterator") {
5757
expectEqualSequence(opaque.utf8.reversed(), native.utf8.reversed())
5858
}
5959

60+
tests.test("String-to-integer parsing") {
61+
let native = "1234"
62+
let opaque = NSSlowString(string: "1234") as String
63+
64+
expectEqual(Int(opaque, radix: 16)!, Int(native, radix: 16)!)
65+
expectEqual(Int(opaque, radix: 15)!, Int(native, radix: 15)!)
66+
expectEqual(Int(opaque, radix: 10)!, Int(native, radix: 10)!)
67+
expectEqual(Int(opaque, radix: 8)!, Int(native, radix: 8)!)
68+
expectEqual(Int(opaque, radix: 5)!, Int(native, radix: 5)!)
69+
70+
expectEqual(UInt16(opaque, radix: 16)!, UInt16(native, radix: 16)!)
71+
expectEqual(UInt16(opaque, radix: 15)!, UInt16(native, radix: 15)!)
72+
expectEqual(UInt16(opaque, radix: 10)!, UInt16(native, radix: 10)!)
73+
expectEqual(UInt16(opaque, radix: 8)!, UInt16(native, radix: 8)!)
74+
expectEqual(UInt16(opaque, radix: 5)!, UInt16(native, radix: 5)!)
75+
}
76+
6077
tests.test("Unicode 9 grapheme breaking")
6178
.xfail(.osxMinor(10, 9, reason: "Mac OS X 10.9 has an old version of ICU"))
6279
.xfail(.iOSMajor(7, reason: "iOS 7 has an old version of ICU"))

0 commit comments

Comments
 (0)