Skip to content

Commit 45f6036

Browse files
[StaticDataLayout][PGO]Implement reader and writer change for data access profiles (#139997)
#138170 introduces classes to operate on data access profiles. This change supports the read and write of `DataAccessProfData` in indexed format of MemProf (v4) as well as its the text (yaml) format. For indexed format: * InstrProfWriter owns (by `std::unique_ptr<DataAccessProfData>`) the data access profiles, and gives a non-owned copy when it calls `writeMemProf`. * MemProf v4 header has a new `uint64_t` to record the byte offset of data access profiles. This `uint64_t` field is zero if data access profile is not set (nullptr). * MemProfReader reads the offset from v4 header and de-serializes in-memory bytes into class `DataAccessProfData`. For textual format: * MemProfYAML.h adds the mapping for DAP class, and make DAP optional for both read and write. 099a0fa (by @snehasish) introduces v4 which contains CalleeGuids in CallSiteInfo, and this change changes the v4 format in place with data access profiles. The current plan is to bump the version and enable v4 profiles with both features, assuming waiting for this change won't delay the callsite change too long. --------- Co-authored-by: Kazu Hirata <[email protected]>
1 parent e2a8855 commit 45f6036

File tree

14 files changed

+313
-32
lines changed

14 files changed

+313
-32
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
#ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
1818
#define LLVM_PROFILEDATA_DATAACCESSPROF_H_
1919

20-
#include "llvm/ADT/DenseMap.h"
2120
#include "llvm/ADT/DenseMapInfoVariant.h"
2221
#include "llvm/ADT/MapVector.h"
23-
#include "llvm/ADT/STLExtras.h"
2422
#include "llvm/ADT/SetVector.h"
2523
#include "llvm/ADT/SmallVector.h"
2624
#include "llvm/ADT/StringRef.h"
@@ -35,12 +33,15 @@
3533

3634
namespace llvm {
3735

38-
namespace data_access_prof {
36+
namespace memprof {
3937

4038
/// The location of data in the source code. Used by profile lookup API.
4139
struct SourceLocation {
4240
SourceLocation(StringRef FileNameRef, uint32_t Line)
4341
: FileName(FileNameRef.str()), Line(Line) {}
42+
43+
// Empty constructor is used in yaml conversion.
44+
SourceLocation() {}
4445
/// The filename where the data is located.
4546
std::string FileName;
4647
/// The line number in the source code.
@@ -53,6 +54,8 @@ namespace internal {
5354
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
5455
// to represent data locations internally.
5556
struct SourceLocationRef {
57+
SourceLocationRef(StringRef FileNameRef, uint32_t Line)
58+
: FileName(FileNameRef), Line(Line) {}
5659
// The filename where the data is located.
5760
StringRef FileName;
5861
// The line number in the source code.
@@ -100,18 +103,21 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
100103
/// The data access profiles for a symbol.
101104
struct DataAccessProfRecord {
102105
public:
103-
DataAccessProfRecord(SymbolHandleRef SymHandleRef,
104-
ArrayRef<internal::SourceLocationRef> LocRefs) {
106+
DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
107+
ArrayRef<internal::SourceLocationRef> LocRefs)
108+
: AccessCount(AccessCount) {
105109
if (std::holds_alternative<StringRef>(SymHandleRef)) {
106110
SymHandle = std::get<StringRef>(SymHandleRef).str();
107111
} else
108112
SymHandle = std::get<uint64_t>(SymHandleRef);
109113

110114
for (auto Loc : LocRefs)
111-
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
115+
Locations.emplace_back(Loc.FileName, Loc.Line);
112116
}
117+
// Empty constructor is used in yaml conversion.
118+
DataAccessProfRecord() {}
113119
SymbolHandle SymHandle;
114-
120+
uint64_t AccessCount;
115121
// The locations of data in the source code. Optional.
116122
SmallVector<SourceLocation> Locations;
117123
};
@@ -208,7 +214,7 @@ class DataAccessProfData {
208214
llvm::SetVector<StringRef> KnownColdSymbols;
209215
};
210216

211-
} // namespace data_access_prof
217+
} // namespace memprof
212218
} // namespace llvm
213219

214220
#endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_

llvm/include/llvm/ProfileData/IndexedMemProfData.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
#ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
1616
#define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
1717

18+
#include "llvm/ProfileData/DataAccessProf.h"
1819
#include "llvm/ProfileData/InstrProf.h"
1920
#include "llvm/ProfileData/MemProf.h"
2021

22+
#include <functional>
23+
#include <optional>
24+
2125
namespace llvm {
2226
namespace memprof {
2327
struct IndexedMemProfData {
@@ -82,8 +86,10 @@ struct IndexedMemProfData {
8286
} // namespace memprof
8387

8488
// Write the MemProf data to OS.
85-
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
86-
memprof::IndexedVersion MemProfVersionRequested,
87-
bool MemProfFullSchema);
89+
Error writeMemProf(
90+
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
91+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
92+
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData);
93+
8894
} // namespace llvm
8995
#endif

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/IR/ProfileSummary.h"
2020
#include "llvm/Object/BuildID.h"
21+
#include "llvm/ProfileData/DataAccessProf.h"
2122
#include "llvm/ProfileData/InstrProf.h"
2223
#include "llvm/ProfileData/InstrProfCorrelator.h"
2324
#include "llvm/ProfileData/MemProf.h"
@@ -703,10 +704,13 @@ class IndexedMemProfReader {
703704
const unsigned char *CallStackBase = nullptr;
704705
// The number of elements in the radix tree array.
705706
unsigned RadixTreeSize = 0;
707+
/// The data access profiles, deserialized from binary data.
708+
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
706709

707710
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
708711
Error deserializeRadixTreeBased(const unsigned char *Start,
709-
const unsigned char *Ptr);
712+
const unsigned char *Ptr,
713+
memprof::IndexedVersion Version);
710714

711715
public:
712716
IndexedMemProfReader() = default;

llvm/include/llvm/ProfileData/InstrProfWriter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/StringMap.h"
2020
#include "llvm/IR/GlobalValue.h"
2121
#include "llvm/Object/BuildID.h"
22+
#include "llvm/ProfileData/DataAccessProf.h"
2223
#include "llvm/ProfileData/IndexedMemProfData.h"
2324
#include "llvm/ProfileData/InstrProf.h"
2425
#include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
8182
// Whether to generated random memprof hotness for testing.
8283
bool MemprofGenerateRandomHotness;
8384

85+
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
86+
8487
public:
8588
// For memprof testing, random hotness can be assigned to the contexts if
8689
// MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
122125
// Add a binary id to the binary ids list.
123126
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
124127

128+
void addDataAccessProfData(
129+
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfile);
130+
125131
/// Merge existing function counts from the given writer.
126132
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
127133
function_ref<void(Error)> Warn);

llvm/include/llvm/ProfileData/MemProfReader.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,20 @@ class YAMLMemProfReader final : public MemProfReader {
229229
create(std::unique_ptr<MemoryBuffer> Buffer);
230230

231231
void parse(StringRef YAMLData);
232+
233+
std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() {
234+
return std::move(DataAccessProfileData);
235+
}
236+
237+
private:
238+
// Called by `parse` to set data access profiles after parsing them from Yaml
239+
// files.
240+
void
241+
setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) {
242+
DataAccessProfileData = std::move(Data);
243+
}
244+
245+
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
232246
};
233247
} // namespace memprof
234248
} // namespace llvm

llvm/include/llvm/ProfileData/MemProfYAML.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
33

44
#include "llvm/ADT/SmallVector.h"
5+
#include "llvm/ProfileData/DataAccessProf.h"
56
#include "llvm/ProfileData/MemProf.h"
67
#include "llvm/Support/Format.h"
78
#include "llvm/Support/YAMLTraits.h"
@@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
2021
MemProfRecord Record;
2122
};
2223

24+
// Helper struct to yamlify memprof::DataAccessProfData. The struct
25+
// members use owned strings. This is for simplicity and assumes that most real
26+
// world use cases do look-ups and regression test scale is small.
27+
struct YamlDataAccessProfData {
28+
std::vector<memprof::DataAccessProfRecord> Records;
29+
std::vector<uint64_t> KnownColdStrHashes;
30+
std::vector<std::string> KnownColdSymbols;
31+
32+
bool isEmpty() const {
33+
return Records.empty() && KnownColdStrHashes.empty() &&
34+
KnownColdSymbols.empty();
35+
}
36+
};
37+
2338
// The top-level data structure, only used with YAML for now.
2439
struct AllMemProfData {
2540
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
41+
YamlDataAccessProfData YamlifiedDataAccessProfiles;
2642
};
2743
} // namespace memprof
2844

@@ -206,9 +222,52 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
206222
}
207223
};
208224

225+
template <> struct MappingTraits<memprof::SourceLocation> {
226+
static void mapping(IO &Io, memprof::SourceLocation &Loc) {
227+
Io.mapOptional("FileName", Loc.FileName);
228+
Io.mapOptional("Line", Loc.Line);
229+
}
230+
};
231+
232+
template <> struct MappingTraits<memprof::DataAccessProfRecord> {
233+
static void mapping(IO &Io, memprof::DataAccessProfRecord &Rec) {
234+
if (Io.outputting()) {
235+
if (std::holds_alternative<std::string>(Rec.SymHandle)) {
236+
Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
237+
} else {
238+
Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
239+
}
240+
} else {
241+
std::string SymName;
242+
uint64_t Hash = 0;
243+
Io.mapOptional("Symbol", SymName);
244+
Io.mapOptional("Hash", Hash);
245+
if (!SymName.empty()) {
246+
Rec.SymHandle = SymName;
247+
} else {
248+
Rec.SymHandle = Hash;
249+
}
250+
}
251+
252+
Io.mapOptional("Locations", Rec.Locations);
253+
}
254+
};
255+
256+
template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
257+
static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
258+
Io.mapOptional("SampledRecords", Data.Records);
259+
Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
260+
Io.mapOptional("KnownColdStrHashes", Data.KnownColdStrHashes);
261+
}
262+
};
263+
209264
template <> struct MappingTraits<memprof::AllMemProfData> {
210265
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
211266
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
267+
// Map data access profiles if reading input, or if writing output &&
268+
// the struct is populated.
269+
if (!Io.outputting() || !Data.YamlifiedDataAccessProfiles.isEmpty())
270+
Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
212271
}
213272
};
214273

@@ -234,5 +293,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
234293
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
235294
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
236295
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
296+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::DataAccessProfRecord)
297+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SourceLocation)
237298

238299
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

llvm/lib/ProfileData/DataAccessProf.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <sys/types.h>
1212

1313
namespace llvm {
14-
namespace data_access_prof {
14+
namespace memprof {
1515

1616
// If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
1717
// creates an owned copy of `Str`, adds a map entry for it and returns the
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
4848

4949
auto It = Records.find(Key);
5050
if (It != Records.end()) {
51-
return DataAccessProfRecord(Key, It->second.Locations);
51+
return DataAccessProfRecord(Key, It->second.AccessCount,
52+
It->second.Locations);
5253
}
5354

5455
return std::nullopt;
@@ -261,5 +262,5 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
261262
}
262263
return Error::success();
263264
}
264-
} // namespace data_access_prof
265+
} // namespace memprof
265266
} // namespace llvm

0 commit comments

Comments
 (0)