Skip to content

Commit 09f1ec7

Browse files
committed
[CGData][llvm-cgdata] Support for stable function map
This introduces a new cgdata format for stable function maps. The raw data is embedded in the __llvm_merge section during compile time. This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal.
1 parent 060a23e commit 09f1ec7

21 files changed

+577
-87
lines changed

lld/test/MachO/cgdata-generate.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
# RUN: rm -rf %t; split-file %s %t
55

6-
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
6+
# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata.
77
# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata
8-
# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
8+
# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt
99
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s
1010
# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata
11-
# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
11+
# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt
1212
# RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s
1313

1414
# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o

llvm/docs/CommandGuide/llvm-cgdata.rst

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ SYNOPSIS
1111
DESCRIPTION
1212
-----------
1313

14-
The :program:llvm-cgdata utility parses raw codegen data embedded
15-
in compiled binary files and merges them into a single .cgdata file.
16-
It can also inspect and manipulate .cgdata files.
17-
Currently, the tool supports saving and restoring outlined hash trees,
18-
enabling global function outlining across modules, allowing for more
19-
efficient function outlining in subsequent compilations.
20-
The design is extensible, allowing for the incorporation of additional
21-
codegen summaries and optimization techniques, such as global function
22-
merging, in the future.
14+
The :program:llvm-cgdata utility parses raw codegen data embedded in compiled
15+
binary files and merges them into a single .cgdata file. It can also inspect
16+
and manipulate .cgdata files. Currently, the tool supports saving and restoring
17+
outlined hash trees and stable function maps, allowing for more efficient
18+
function outlining and function merging across modules in subsequent
19+
compilations. The design is extensible, allowing for the incorporation of
20+
additional codegen summaries and optimization techniques.
2321

2422
COMMANDS
2523
--------

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Bitcode/BitcodeReader.h"
2020
#include "llvm/CGData/OutlinedHashTree.h"
2121
#include "llvm/CGData/OutlinedHashTreeRecord.h"
22+
#include "llvm/CGData/StableFunctionMapRecord.h"
2223
#include "llvm/IR/Module.h"
2324
#include "llvm/Object/ObjectFile.h"
2425
#include "llvm/Support/Caching.h"
@@ -41,7 +42,9 @@ enum class CGDataKind {
4142
Unknown = 0x0,
4243
// A function outlining info.
4344
FunctionOutlinedHashTree = 0x1,
44-
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
45+
// A function merging info.
46+
StableFunctionMergingMap = 0x2,
47+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
4548
};
4649

4750
const std::error_category &cgdata_category();
@@ -108,6 +111,8 @@ enum CGDataMode {
108111
class CodeGenData {
109112
/// Global outlined hash tree that has oulined hash sequences across modules.
110113
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
114+
/// Global stable function map that has stable function info across modules.
115+
std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
111116

112117
/// This flag is set when -fcodegen-data-generate is passed.
113118
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
@@ -131,6 +136,9 @@ class CodeGenData {
131136
bool hasOutlinedHashTree() {
132137
return PublishedHashTree && !PublishedHashTree->empty();
133138
}
139+
bool hasStableFunctionMap() {
140+
return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
141+
}
134142

135143
/// Returns the outlined hash tree. This can be globally used in a read-only
136144
/// manner.
@@ -147,6 +155,12 @@ class CodeGenData {
147155
// Ensure we disable emitCGData as we do not want to read and write both.
148156
EmitCGData = false;
149157
}
158+
void
159+
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
160+
PublishedStableFunctionMap = std::move(FunctionMap);
161+
// Ensure we disable emitCGData as we do not want to read and write both.
162+
EmitCGData = false;
163+
}
150164
};
151165

152166
namespace cgdata {
@@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
166180
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
167181
}
168182

183+
inline void
184+
publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
185+
CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
186+
}
187+
169188
struct StreamCacheData {
170189
/// Backing buffer for serialized data stream.
171190
SmallVector<SmallString<0>> Outputs;
@@ -249,6 +268,8 @@ enum CGDataVersion {
249268
// Version 1 is the first version. This version supports the outlined
250269
// hash tree.
251270
Version1 = 1,
271+
// Version 2 supports the stable function merging map.
272+
Version2 = 2,
252273
CurrentVersion = CG_DATA_INDEX_VERSION
253274
};
254275
const uint64_t Version = CGDataVersion::CurrentVersion;
@@ -258,6 +279,7 @@ struct Header {
258279
uint32_t Version;
259280
uint32_t DataKind;
260281
uint64_t OutlinedHashTreeOffset;
282+
uint64_t StableFunctionMapOffset;
261283

262284
// New fields should only be added at the end to ensure that the size
263285
// computation is correct. The methods below need to be updated to ensure that

llvm/include/llvm/CGData/CodeGenData.inc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,33 @@
2020
#define CG_DATA_DEFINED
2121
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
2222
CG_DATA_OUTLINE_COFF, "__DATA,")
23+
CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
24+
CG_DATA_MERGE_COFF, "__DATA,")
2325

2426
#undef CG_DATA_SECT_ENTRY
2527
#endif
2628

2729
/* section name strings common to all targets other
2830
than WIN32 */
2931
#define CG_DATA_OUTLINE_COMMON __llvm_outline
32+
#define CG_DATA_MERGE_COMMON __llvm_merge
3033
/* Since cg data sections are not allocated, we don't need to
3134
* access them at runtime.
3235
*/
3336
#define CG_DATA_OUTLINE_COFF ".loutline"
37+
#define CG_DATA_MERGE_COFF ".lmerge"
3438

3539
#ifdef _WIN32
3640
/* Runtime section names and name strings. */
37-
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
41+
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF
42+
#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF
3843

3944
#else
4045
/* Runtime section names and name strings. */
41-
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
46+
#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
47+
#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON)
4248

4349
#endif
4450

4551
/* Indexed codegen data format version (start from 1). */
46-
#define CG_DATA_INDEX_VERSION 1
52+
#define CG_DATA_INDEX_VERSION 2

llvm/include/llvm/CGData/CodeGenDataReader.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/CGData/CodeGenData.h"
1717
#include "llvm/CGData/OutlinedHashTreeRecord.h"
18+
#include "llvm/CGData/StableFunctionMapRecord.h"
1819
#include "llvm/Support/LineIterator.h"
1920
#include "llvm/Support/VirtualFileSystem.h"
2021

@@ -36,10 +37,15 @@ class CodeGenDataReader {
3637
virtual CGDataKind getDataKind() const = 0;
3738
/// Return true if the data has an outlined hash tree.
3839
virtual bool hasOutlinedHashTree() const = 0;
40+
/// Return true if the data has a stable function map.
41+
virtual bool hasStableFunctionMap() const = 0;
3942
/// Return the outlined hash tree that is released from the reader.
4043
std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
4144
return std::move(HashTreeRecord.HashTree);
4245
}
46+
std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() {
47+
return std::move(FunctionMapRecord.FunctionMap);
48+
}
4349

4450
/// Factory method to create an appropriately typed reader for the given
4551
/// codegen data file path and file system.
@@ -56,15 +62,21 @@ class CodeGenDataReader {
5662
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
5763
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
5864
/// the merged data.
59-
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
60-
OutlinedHashTreeRecord &GlobalOutlineRecord,
61-
stable_hash *CombinedHash = nullptr);
65+
static Error
66+
mergeFromObjectFile(const object::ObjectFile *Obj,
67+
OutlinedHashTreeRecord &GlobalOutlineRecord,
68+
StableFunctionMapRecord &GlobalFunctionMapRecord,
69+
stable_hash *CombinedHash = nullptr);
6270

6371
protected:
6472
/// The outlined hash tree that has been read. When it's released by
6573
/// releaseOutlinedHashTree(), it's no longer valid.
6674
OutlinedHashTreeRecord HashTreeRecord;
6775

76+
/// The stable function map that has been read. When it's released by
77+
// releaseStableFunctionMap(), it's no longer valid.
78+
StableFunctionMapRecord FunctionMapRecord;
79+
6880
/// Set the current error and return same.
6981
Error error(cgdata_error Err, const std::string &ErrMsg = "") {
7082
LastError = Err;
@@ -115,6 +127,11 @@ class IndexedCodeGenDataReader : public CodeGenDataReader {
115127
return Header.DataKind &
116128
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
117129
}
130+
/// Return true if the header indicates the data has a stable function map.
131+
bool hasStableFunctionMap() const override {
132+
return Header.DataKind &
133+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
134+
}
118135
};
119136

120137
/// This format is a simple text format that's suitable for test data.
@@ -150,6 +167,12 @@ class TextCodeGenDataReader : public CodeGenDataReader {
150167
return static_cast<uint32_t>(DataKind) &
151168
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
152169
}
170+
/// Return true if the header indicates the data has a stable function map.
171+
/// This does not mean that the data is still available.
172+
bool hasStableFunctionMap() const override {
173+
return static_cast<uint32_t>(DataKind) &
174+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
175+
}
153176
};
154177

155178
} // end namespace llvm

llvm/include/llvm/CGData/CodeGenDataWriter.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/CGData/CodeGenData.h"
1717
#include "llvm/CGData/OutlinedHashTreeRecord.h"
18+
#include "llvm/CGData/StableFunctionMapRecord.h"
1819
#include "llvm/Support/EndianStream.h"
1920
#include "llvm/Support/Error.h"
2021

@@ -57,16 +58,22 @@ class CodeGenDataWriter {
5758
/// The outlined hash tree to be written.
5859
OutlinedHashTreeRecord HashTreeRecord;
5960

61+
/// The stable function map to be written.
62+
StableFunctionMapRecord FunctionMapRecord;
63+
6064
/// A bit mask describing the kind of the codegen data.
6165
CGDataKind DataKind = CGDataKind::Unknown;
6266

6367
public:
6468
CodeGenDataWriter() = default;
6569
~CodeGenDataWriter() = default;
6670

67-
/// Add the outlined hash tree record. The input Record is released.
71+
/// Add the outlined hash tree record. The input hash tree is released.
6872
void addRecord(OutlinedHashTreeRecord &Record);
6973

74+
/// Add the stable function map record. The input function map is released.
75+
void addRecord(StableFunctionMapRecord &Record);
76+
7077
/// Write the codegen data to \c OS
7178
Error write(raw_fd_ostream &OS);
7279

@@ -81,11 +88,19 @@ class CodeGenDataWriter {
8188
return static_cast<uint32_t>(DataKind) &
8289
static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
8390
}
91+
/// Return true if the header indicates the data has a stable function map.
92+
bool hasStableFunctionMap() const {
93+
return static_cast<uint32_t>(DataKind) &
94+
static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap);
95+
}
8496

8597
private:
8698
/// The offset of the outlined hash tree in the file.
8799
uint64_t OutlinedHashTreeOffset;
88100

101+
/// The offset of the stable function map in the file.
102+
uint64_t StableFunctionMapOffset;
103+
89104
/// Write the codegen data header to \c COS
90105
Error writeHeader(CGDataOStream &COS);
91106

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/Bitcode/BitcodeWriter.h"
1515
#include "llvm/CGData/CodeGenDataReader.h"
1616
#include "llvm/CGData/OutlinedHashTreeRecord.h"
17+
#include "llvm/CGData/StableFunctionMapRecord.h"
1718
#include "llvm/Object/ObjectFile.h"
1819
#include "llvm/Support/Caching.h"
1920
#include "llvm/Support/CommandLine.h"
@@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() {
163164
auto Reader = ReaderOrErr->get();
164165
if (Reader->hasOutlinedHashTree())
165166
Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
167+
if (Reader->hasStableFunctionMap())
168+
Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
166169
}
167170
});
168171
return *(Instance.get());
@@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
185188
return make_error<CGDataError>(cgdata_error::unsupported_version);
186189
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
187190

188-
switch (H.Version) {
189-
// When a new field is added to the header add a case statement here to
190-
// compute the size as offset of the new field + size of the new field. This
191-
// relies on the field being added to the end of the list.
192-
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
193-
"Please update the size computation below if a new field has "
194-
"been added to the header, if not add a case statement to "
195-
"fall through to the latest version.");
196-
case 1ull:
197-
H.OutlinedHashTreeOffset =
191+
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
192+
"Please update the offset computation below if a new field has "
193+
"been added to the header.");
194+
H.OutlinedHashTreeOffset =
195+
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
196+
if (H.Version >= 2)
197+
H.StableFunctionMapOffset =
198198
endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
199-
}
200199

201200
return H;
202201
}
@@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
257256

258257
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
259258
OutlinedHashTreeRecord GlobalOutlineRecord;
259+
StableFunctionMapRecord GlobalStableFunctionMapRecord;
260260
stable_hash CombinedHash = 0;
261261
for (auto File : ObjFiles) {
262262
if (File.empty())
@@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
270270

271271
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
272272
if (auto E = CodeGenDataReader::mergeFromObjectFile(
273-
Obj.get(), GlobalOutlineRecord, &CombinedHash))
273+
Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
274+
&CombinedHash))
274275
return E;
275276
}
276277

278+
GlobalStableFunctionMapRecord.finalize();
279+
277280
if (!GlobalOutlineRecord.empty())
278281
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
282+
if (!GlobalStableFunctionMapRecord.empty())
283+
cgdata::publishStableFunctionMap(
284+
std::move(GlobalStableFunctionMapRecord.FunctionMap));
279285

280286
return CombinedHash;
281287
}

0 commit comments

Comments
 (0)