Skip to content

Commit 83dce6e

Browse files
authored
Merge pull request #236 from CodaFi/perfmon
Improve Deserialization Performance for Bitstream Files
2 parents 10021cf + eed8368 commit 83dce6e

File tree

5 files changed

+124
-132
lines changed

5 files changed

+124
-132
lines changed

Sources/TSCUtility/Bitstream.swift

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,50 @@ public struct Bitcode {
1818
public let blockInfo: [UInt64: BlockInfo]
1919
}
2020

21+
/// A non-owning view of a bitcode element.
2122
public enum BitcodeElement {
2223
public struct Block {
2324
public var id: UInt64
2425
public var elements: [BitcodeElement]
2526
}
2627

28+
/// A record element.
29+
///
30+
/// - Warning: A `Record` element's fields and payload only live as long as
31+
/// the `visit` function that provides them is called. To persist
32+
/// a record, always make a copy of it.
2733
public struct Record {
2834
public enum Payload {
2935
case none
3036
case array([UInt64])
3137
case char6String(String)
32-
case blob(Data)
38+
case blob(ArraySlice<UInt8>)
3339
}
3440

3541
public var id: UInt64
36-
public var fields: [UInt64]
42+
public var fields: UnsafeBufferPointer<UInt64>
3743
public var payload: Payload
3844
}
3945

4046
case block(Block)
4147
case record(Record)
4248
}
4349

50+
extension BitcodeElement.Record.Payload: CustomStringConvertible {
51+
public var description: String {
52+
switch self {
53+
case .none:
54+
return "none"
55+
case .array(let vals):
56+
return "array(\(vals))"
57+
case .char6String(let s):
58+
return "char6String(\(s))"
59+
case .blob(let s):
60+
return "blob(\(s.count) bytes)"
61+
}
62+
}
63+
}
64+
4465
public struct BlockInfo {
4566
public var name: String = ""
4667
public var recordNames: [UInt64: String] = [:]

Sources/TSCUtility/BitstreamReader.swift

Lines changed: 50 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,8 @@ import Foundation
1212
import TSCBasic
1313

1414
extension Bitcode {
15-
/// Parse a bitstream from data.
16-
@available(*, deprecated, message: "Use Bitcode.init(bytes:) instead")
17-
public init(data: Data) throws {
18-
precondition(data.count > 4)
19-
try self.init(bytes: ByteString(data))
20-
}
21-
22-
public init(bytes: ByteString) throws {
23-
precondition(bytes.count > 4)
24-
var reader = BitstreamReader(buffer: bytes)
25-
let signature = try reader.readSignature()
26-
var visitor = CollectingVisitor()
27-
try reader.readBlock(id: BitstreamReader.fakeTopLevelBlockID,
28-
abbrevWidth: 2,
29-
abbrevInfo: [],
30-
visitor: &visitor)
31-
self.init(signature: signature,
32-
elements: visitor.finalizeTopLevelElements(),
33-
blockInfo: reader.blockInfo)
34-
}
35-
3615
/// Traverse a bitstream using the specified `visitor`, which will receive
3716
/// callbacks when blocks and records are encountered.
38-
@available(*, deprecated, message: "Use Bitcode.read(bytes:using:) instead")
39-
public static func read<Visitor: BitstreamVisitor>(stream data: Data, using visitor: inout Visitor) throws {
40-
precondition(data.count > 4)
41-
try Self.read(bytes: ByteString(data), using: &visitor)
42-
}
43-
4417
public static func read<Visitor: BitstreamVisitor>(bytes: ByteString, using visitor: inout Visitor) throws {
4518
precondition(bytes.count > 4)
4619
var reader = BitstreamReader(buffer: bytes)
@@ -52,36 +25,6 @@ extension Bitcode {
5225
}
5326
}
5427

55-
/// A basic visitor that collects all the blocks and records in a stream.
56-
private struct CollectingVisitor: BitstreamVisitor {
57-
var stack: [(UInt64, [BitcodeElement])] = [(BitstreamReader.fakeTopLevelBlockID, [])]
58-
59-
func validate(signature: Bitcode.Signature) throws {}
60-
61-
mutating func shouldEnterBlock(id: UInt64) throws -> Bool {
62-
stack.append((id, []))
63-
return true
64-
}
65-
66-
mutating func didExitBlock() throws {
67-
guard let (id, elements) = stack.popLast() else {
68-
fatalError("Unbalanced calls to shouldEnterBlock/didExitBlock")
69-
}
70-
71-
let block = BitcodeElement.Block(id: id, elements: elements)
72-
stack[stack.endIndex-1].1.append(.block(block))
73-
}
74-
75-
mutating func visit(record: BitcodeElement.Record) throws {
76-
stack[stack.endIndex-1].1.append(.record(record))
77-
}
78-
79-
func finalizeTopLevelElements() -> [BitcodeElement] {
80-
assert(stack.count == 1)
81-
return stack[0].1
82-
}
83-
}
84-
8528
private extension Bits.Cursor {
8629
enum BitcodeError: Swift.Error {
8730
case vbrOverflow
@@ -161,6 +104,7 @@ private struct BitstreamReader {
161104
guard numOps > 0 else { throw Error.invalidAbbrev }
162105

163106
var operands: [Bitstream.Abbreviation.Operand] = []
107+
operands.reserveCapacity(numOps)
164108
for i in 0..<numOps {
165109
operands.append(try readAbbrevOp())
166110

@@ -204,15 +148,29 @@ private struct BitstreamReader {
204148
}
205149
}
206150

207-
mutating func readAbbreviatedRecord(_ abbrev: Bitstream.Abbreviation) throws -> BitcodeElement.Record {
151+
/// Computes a non-owning view of a `BitcodeElement.Record` that is valid for
152+
/// the lifetime of the call to `body`.
153+
///
154+
/// - Warning: If this function throws, the `body` block will not be called.
155+
mutating func withAbbreviatedRecord(
156+
_ abbrev: Bitstream.Abbreviation,
157+
body: (BitcodeElement.Record) throws -> Void
158+
) throws {
208159
let code = try readSingleAbbreviatedRecordOperand(abbrev.operands.first!)
209160

210161
let lastOperand = abbrev.operands.last!
211162
let lastRegularOperandIndex: Int = abbrev.operands.endIndex - (lastOperand.isPayload ? 1 : 0)
212163

213-
var fields = [UInt64]()
214-
for op in abbrev.operands[1..<lastRegularOperandIndex] {
215-
fields.append(try readSingleAbbreviatedRecordOperand(op))
164+
// Safety: `lastRegularOperandIndex` is always at least 1. An abbreviation
165+
// is required by the format to contain at least one operand. If that last
166+
// operand is a payload (and thus we subtracted one from the total number of
167+
// operands above), then that must mean it is either a trailing array
168+
// or trailing blob. Both of these are preceded by their length field.
169+
let fields = UnsafeMutableBufferPointer<UInt64>.allocate(capacity: lastRegularOperandIndex - 1)
170+
defer { fields.deallocate() }
171+
172+
for (idx, op) in abbrev.operands[1..<lastRegularOperandIndex].enumerated() {
173+
fields[idx] = try readSingleAbbreviatedRecordOperand(op)
216174
}
217175

218176
let payload: BitcodeElement.Record.Payload
@@ -222,26 +180,42 @@ private struct BitstreamReader {
222180
switch lastOperand {
223181
case .array(let element):
224182
let length = try cursor.readVBR(6)
225-
var elements = [UInt64]()
226-
for _ in 0..<length {
227-
elements.append(try readSingleAbbreviatedRecordOperand(element))
228-
}
229183
if case .char6 = element {
230-
payload = .char6String(String(String.UnicodeScalarView(elements.map { UnicodeScalar(UInt8($0)) })))
184+
// FIXME: Once the minimum deployment target bumps to macOS 11, use
185+
// the more ergonomic stdlib API everywhere.
186+
if #available(macOS 11.0, *) {
187+
payload = try .char6String(String(unsafeUninitializedCapacity: Int(length)) { buffer in
188+
for i in 0..<Int(length) {
189+
buffer[i] = try UInt8(readSingleAbbreviatedRecordOperand(element))
190+
}
191+
return Int(length)
192+
})
193+
} else {
194+
let buffer = UnsafeMutableBufferPointer<UInt8>.allocate(capacity: Int(length))
195+
defer { buffer.deallocate() }
196+
for i in 0..<Int(length) {
197+
buffer[i] = try UInt8(readSingleAbbreviatedRecordOperand(element))
198+
}
199+
payload = .char6String(String(decoding: buffer, as: UTF8.self))
200+
}
231201
} else {
202+
var elements = [UInt64]()
203+
for _ in 0..<length {
204+
elements.append(try readSingleAbbreviatedRecordOperand(element))
205+
}
232206
payload = .array(elements)
233207
}
234208
case .blob:
235209
let length = Int(try cursor.readVBR(6))
236210
try cursor.advance(toBitAlignment: 32)
237-
payload = .blob(try Data(cursor.read(bytes: length)))
211+
payload = .blob(try cursor.read(bytes: length))
238212
try cursor.advance(toBitAlignment: 32)
239213
default:
240214
fatalError()
241215
}
242216
}
243217

244-
return .init(id: code, fields: fields, payload: payload)
218+
return try body(.init(id: code, fields: UnsafeBufferPointer(fields), payload: payload))
245219
}
246220

247221
mutating func readBlockInfoBlock(abbrevWidth: Int) throws {
@@ -341,17 +315,20 @@ private struct BitstreamReader {
341315
case Bitstream.AbbreviationID.unabbreviatedRecord.rawValue:
342316
let code = try cursor.readVBR(6)
343317
let numOps = try cursor.readVBR(6)
344-
var operands = [UInt64]()
345-
for _ in 0..<numOps {
346-
operands.append(try cursor.readVBR(6))
318+
let operands = UnsafeMutableBufferPointer<UInt64>.allocate(capacity: Int(numOps))
319+
defer { operands.deallocate() }
320+
for i in 0..<Int(numOps) {
321+
operands[i] = try cursor.readVBR(6)
347322
}
348-
try visitor.visit(record: .init(id: code, fields: operands, payload: .none))
323+
try visitor.visit(record: .init(id: code, fields: UnsafeBufferPointer(operands), payload: .none))
349324

350325
case let abbrevID:
351326
guard Int(abbrevID) - 4 < abbrevInfo.count else {
352327
throw Error.noSuchAbbrev(blockID: id, abbrevID: Int(abbrevID))
353328
}
354-
try visitor.visit(record: try readAbbreviatedRecord(abbrevInfo[Int(abbrevID) - 4]))
329+
try withAbbreviatedRecord(abbrevInfo[Int(abbrevID) - 4]) { record in
330+
try visitor.visit(record: record)
331+
}
355332
}
356333
}
357334

Sources/TSCUtility/BitstreamWriter.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ extension BitstreamWriter {
338338

339339
fileprivate init() {
340340
self.values = []
341+
self.values.reserveCapacity(8)
341342
}
342343

343344
fileprivate init<CodeType>(recordID: CodeType)
@@ -367,6 +368,7 @@ extension BitstreamWriter {
367368
}
368369

369370
public mutating func append(_ string: String) {
371+
self.values.reserveCapacity(self.values.capacity + string.utf8.count)
370372
for byte in string.utf8 {
371373
values.append(UInt32(byte))
372374
}

Sources/TSCUtility/SerializedDiagnostics.swift

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,6 @@ public struct SerializedDiagnostics {
4343
/// Serialized diagnostics.
4444
public var diagnostics: [Diagnostic]
4545

46-
@available(*, deprecated, message: "Use SerializedDiagnostics.init(bytes:) instead")
47-
public init(data: Data) throws {
48-
var reader = Reader()
49-
try Bitcode.read(stream: data, using: &reader)
50-
guard let version = reader.versionNumber else { throw Error.noMetadataBlock }
51-
self.versionNumber = version
52-
self.diagnostics = reader.diagnostics
53-
}
54-
5546
public init(bytes: ByteString) throws {
5647
var reader = Reader()
5748
try Bitcode.read(bytes: bytes, using: &reader)
@@ -88,7 +79,7 @@ extension SerializedDiagnostics {
8879
/// Fix-its associated with the diagnostic.
8980
public var fixIts: [FixIt]
9081

91-
fileprivate init(records: [BitcodeElement.Record],
82+
fileprivate init(records: [SerializedDiagnostics.OwnedRecord],
9283
filenameMap: inout [UInt64: String],
9384
flagMap: inout [UInt64: String],
9485
categoryMap: inout [UInt64: String]) throws {
@@ -107,7 +98,7 @@ extension SerializedDiagnostics {
10798
case .blob(let diagnosticBlob) = record.payload
10899
else { throw Error.malformedRecord }
109100

110-
text = String(data: diagnosticBlob, encoding: .utf8)
101+
text = String(decoding: diagnosticBlob, as: UTF8.self)
111102
level = Level(rawValue: record.fields[0])
112103
location = SourceLocation(fields: record.fields[1...4],
113104
filenameMap: filenameMap)
@@ -125,38 +116,38 @@ extension SerializedDiagnostics {
125116
}
126117
case .flag:
127118
guard record.fields.count == 2,
128-
case .blob(let flagBlob) = record.payload,
129-
let flagText = String(data: flagBlob, encoding: .utf8)
119+
case .blob(let flagBlob) = record.payload
130120
else { throw Error.malformedRecord }
131121

122+
let flagText = String(decoding: flagBlob, as: UTF8.self)
132123
let diagnosticID = record.fields[0]
133124
flagMap[diagnosticID] = flagText
134125

135126
case .category:
136127
guard record.fields.count == 2,
137-
case .blob(let categoryBlob) = record.payload,
138-
let categoryText = String(data: categoryBlob, encoding: .utf8)
128+
case .blob(let categoryBlob) = record.payload
139129
else { throw Error.malformedRecord }
140130

131+
let categoryText = String(decoding: categoryBlob, as: UTF8.self)
141132
let categoryID = record.fields[0]
142133
categoryMap[categoryID] = categoryText
143134

144135
case .filename:
145136
guard record.fields.count == 4,
146-
case .blob(let filenameBlob) = record.payload,
147-
let filenameText = String(data: filenameBlob, encoding: .utf8)
137+
case .blob(let filenameBlob) = record.payload
148138
else { throw Error.malformedRecord }
149139

140+
let filenameText = String(decoding: filenameBlob, as: UTF8.self)
150141
let filenameID = record.fields[0]
151142
// record.fields[1] and record.fields[2] are no longer used.
152143
filenameMap[filenameID] = filenameText
153144

154145
case .fixit:
155146
guard record.fields.count == 9,
156-
case .blob(let fixItBlob) = record.payload,
157-
let fixItText = String(data: fixItBlob, encoding: .utf8)
147+
case .blob(let fixItBlob) = record.payload
158148
else { throw Error.malformedRecord }
159149

150+
let fixItText = String(decoding: fixItBlob, as: UTF8.self)
160151
if let start = SourceLocation(fields: record.fields[0...3],
161152
filenameMap: filenameMap),
162153
let end = SourceLocation(fields: record.fields[4...7],
@@ -223,7 +214,7 @@ extension SerializedDiagnostics {
223214
var flagMap = [UInt64: String]()
224215
var categoryMap = [UInt64: String]()
225216

226-
var currentDiagnosticRecords: [BitcodeElement.Record] = []
217+
var currentDiagnosticRecords: [OwnedRecord] = []
227218

228219
func validate(signature: Bitcode.Signature) throws {
229220
guard signature == .init(string: "DIAG") else { throw Error.badMagic }
@@ -256,10 +247,44 @@ extension SerializedDiagnostics {
256247
}
257248
versionNumber = Int(record.fields[0])
258249
case .diagnostic:
259-
currentDiagnosticRecords.append(record)
250+
currentDiagnosticRecords.append(SerializedDiagnostics.OwnedRecord(record))
260251
case nil:
261252
throw Error.unexpectedTopLevelRecord
262253
}
263254
}
264255
}
265256
}
257+
258+
extension SerializedDiagnostics {
259+
struct OwnedRecord {
260+
public enum Payload {
261+
case none
262+
case array([UInt64])
263+
case char6String(String)
264+
case blob([UInt8])
265+
266+
init(_ payload: BitcodeElement.Record.Payload) {
267+
switch payload {
268+
case .none:
269+
self = .none
270+
case .array(let a):
271+
self = .array(Array(a))
272+
case .char6String(let s):
273+
self = .char6String(s)
274+
case .blob(let b):
275+
self = .blob(Array(b))
276+
}
277+
}
278+
}
279+
280+
public var id: UInt64
281+
public var fields: [UInt64]
282+
public var payload: Payload
283+
284+
init(_ record: BitcodeElement.Record) {
285+
self.id = record.id
286+
self.fields = Array(record.fields)
287+
self.payload = Payload(record.payload)
288+
}
289+
}
290+
}

0 commit comments

Comments
 (0)