Skip to content

Commit fd96c01

Browse files
snehasishfrederik-h
authored andcommitted
[MemProf] Extend CallSite information to include potential callees. (llvm#130441)
* Added YAML traits for `CallSiteInfo` * Updated the `MemProfReader` to pass `Frames` instead of the entire `CallSiteInfo` * Updated test cases to use `testing::Field` * Add YAML sequence traits for CallSiteInfo in MemProfYAML * Also extend IndexedMemProfRecord * XFAIL the MemProfYaml round trip test until we update the profile format For now we only read and write the additional information from the YAML format. The YAML round trip test will be enabled when the serialized format is updated.
1 parent a00cbca commit fd96c01

File tree

12 files changed

+151
-60
lines changed

12 files changed

+151
-60
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,28 @@ using CallStackId = uint64_t;
342342
// A type representing the index into the call stack array.
343343
using LinearCallStackId = uint32_t;
344344

345+
// Holds call site information with indexed frame contents.
346+
struct IndexedCallSiteInfo {
347+
// The call stack ID for this call site
348+
CallStackId CSId = 0;
349+
// The GUIDs of the callees at this call site
350+
SmallVector<GlobalValue::GUID, 1> CalleeGuids;
351+
352+
IndexedCallSiteInfo() = default;
353+
IndexedCallSiteInfo(CallStackId CSId) : CSId(CSId) {}
354+
IndexedCallSiteInfo(CallStackId CSId,
355+
SmallVector<GlobalValue::GUID, 1> CalleeGuids)
356+
: CSId(CSId), CalleeGuids(std::move(CalleeGuids)) {}
357+
358+
bool operator==(const IndexedCallSiteInfo &Other) const {
359+
return CSId == Other.CSId && CalleeGuids == Other.CalleeGuids;
360+
}
361+
362+
bool operator!=(const IndexedCallSiteInfo &Other) const {
363+
return !operator==(Other);
364+
}
365+
};
366+
345367
// Holds allocation information in a space efficient format where frames are
346368
// represented using unique identifiers.
347369
struct IndexedAllocationInfo {
@@ -410,7 +432,7 @@ struct IndexedMemProfRecord {
410432
// list of inline locations in bottom-up order i.e. from leaf to root. The
411433
// inline location list may include additional entries, users should pick
412434
// the last entry in the list with the same function GUID.
413-
llvm::SmallVector<CallStackId> CallSiteIds;
435+
llvm::SmallVector<IndexedCallSiteInfo> CallSites;
414436

415437
void clear() { *this = IndexedMemProfRecord(); }
416438

@@ -427,7 +449,7 @@ struct IndexedMemProfRecord {
427449
if (Other.AllocSites != AllocSites)
428450
return false;
429451

430-
if (Other.CallSiteIds != CallSiteIds)
452+
if (Other.CallSites != CallSites)
431453
return false;
432454
return true;
433455
}
@@ -455,14 +477,37 @@ struct IndexedMemProfRecord {
455477
static GlobalValue::GUID getGUID(const StringRef FunctionName);
456478
};
457479

480+
// Holds call site information with frame contents inline.
481+
struct CallSiteInfo {
482+
// The frames in the call stack
483+
std::vector<Frame> Frames;
484+
485+
// The GUIDs of the callees at this call site
486+
SmallVector<GlobalValue::GUID, 1> CalleeGuids;
487+
488+
CallSiteInfo() = default;
489+
CallSiteInfo(std::vector<Frame> Frames) : Frames(std::move(Frames)) {}
490+
CallSiteInfo(std::vector<Frame> Frames,
491+
SmallVector<GlobalValue::GUID, 1> CalleeGuids)
492+
: Frames(std::move(Frames)), CalleeGuids(std::move(CalleeGuids)) {}
493+
494+
bool operator==(const CallSiteInfo &Other) const {
495+
return Frames == Other.Frames && CalleeGuids == Other.CalleeGuids;
496+
}
497+
498+
bool operator!=(const CallSiteInfo &Other) const {
499+
return !operator==(Other);
500+
}
501+
};
502+
458503
// Holds the memprof profile information for a function. The internal
459504
// representation stores frame contents inline. This representation should
460505
// be used for small amount of temporary, in memory instances.
461506
struct MemProfRecord {
462507
// Same as IndexedMemProfRecord::AllocSites with frame contents inline.
463508
llvm::SmallVector<AllocationInfo> AllocSites;
464509
// Same as IndexedMemProfRecord::CallSites with frame contents inline.
465-
llvm::SmallVector<std::vector<Frame>> CallSites;
510+
llvm::SmallVector<CallSiteInfo> CallSites;
466511

467512
MemProfRecord() = default;
468513

@@ -476,8 +521,8 @@ struct MemProfRecord {
476521

477522
if (!CallSites.empty()) {
478523
OS << " CallSites:\n";
479-
for (const std::vector<Frame> &Frames : CallSites) {
480-
for (const Frame &F : Frames) {
524+
for (const CallSiteInfo &CS : CallSites) {
525+
for (const Frame &F : CS.Frames) {
481526
OS << " -\n";
482527
F.printYAML(OS);
483528
}

llvm/include/llvm/ProfileData/MemProfYAML.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,14 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
155155
// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
156156
// treat the GUID and the fields within MemProfRecord at the same level as if
157157
// the GUID were part of MemProfRecord.
158+
template <> struct MappingTraits<memprof::CallSiteInfo> {
159+
static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
160+
Io.mapRequired("Frames", CS.Frames);
161+
// Keep this optional to make it easier to write tests.
162+
Io.mapOptional("CalleeGuids", CS.CalleeGuids);
163+
}
164+
};
165+
158166
template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
159167
static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
160168
Io.mapRequired("GUID", Pair.GUID);
@@ -174,6 +182,7 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
174182
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
175183
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
176184
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
185+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
177186
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
178187

179188
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

llvm/lib/ProfileData/MemProf.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
6464
// The number of callsites we have information for.
6565
Result += sizeof(uint64_t);
6666
// The CallStackId
67-
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
67+
Result += Record.CallSites.size() * sizeof(CallStackId);
6868
return Result;
6969
}
7070

@@ -78,7 +78,7 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
7878
// The number of callsites we have information for.
7979
Result += sizeof(uint64_t);
8080
// The linear call stack ID.
81-
Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
81+
Result += Record.CallSites.size() * sizeof(LinearCallStackId);
8282
return Result;
8383
}
8484

@@ -106,9 +106,9 @@ static void serializeV2(const IndexedMemProfRecord &Record,
106106
}
107107

108108
// Related contexts.
109-
LE.write<uint64_t>(Record.CallSiteIds.size());
110-
for (const auto &CSId : Record.CallSiteIds)
111-
LE.write<CallStackId>(CSId);
109+
LE.write<uint64_t>(Record.CallSites.size());
110+
for (const auto &CS : Record.CallSites)
111+
LE.write<CallStackId>(CS.CSId);
112112
}
113113

114114
static void serializeV3(
@@ -127,10 +127,10 @@ static void serializeV3(
127127
}
128128

129129
// Related contexts.
130-
LE.write<uint64_t>(Record.CallSiteIds.size());
131-
for (const auto &CSId : Record.CallSiteIds) {
132-
assert(MemProfCallStackIndexes.contains(CSId));
133-
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
130+
LE.write<uint64_t>(Record.CallSites.size());
131+
for (const auto &CS : Record.CallSites) {
132+
assert(MemProfCallStackIndexes.contains(CS.CSId));
133+
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
134134
}
135135
}
136136

@@ -170,11 +170,11 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
170170
// Read the callsite information.
171171
const uint64_t NumCtxs =
172172
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
173-
Record.CallSiteIds.reserve(NumCtxs);
173+
Record.CallSites.reserve(NumCtxs);
174174
for (uint64_t J = 0; J < NumCtxs; J++) {
175175
CallStackId CSId =
176176
endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
177-
Record.CallSiteIds.push_back(CSId);
177+
Record.CallSites.emplace_back(CSId);
178178
}
179179

180180
return Record;
@@ -202,15 +202,15 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
202202
// Read the callsite information.
203203
const uint64_t NumCtxs =
204204
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
205-
Record.CallSiteIds.reserve(NumCtxs);
205+
Record.CallSites.reserve(NumCtxs);
206206
for (uint64_t J = 0; J < NumCtxs; J++) {
207207
// We are storing LinearCallStackId in CallSiteIds, which is a vector of
208208
// CallStackId. Assert that CallStackId is no smaller than
209209
// LinearCallStackId.
210210
static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
211211
LinearCallStackId CSId =
212212
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
213-
Record.CallSiteIds.push_back(CSId);
213+
Record.CallSites.emplace_back(CSId);
214214
}
215215

216216
return Record;
@@ -241,9 +241,11 @@ MemProfRecord IndexedMemProfRecord::toMemProfRecord(
241241
Record.AllocSites.push_back(std::move(AI));
242242
}
243243

244-
Record.CallSites.reserve(CallSiteIds.size());
245-
for (CallStackId CSId : CallSiteIds)
246-
Record.CallSites.push_back(Callback(CSId));
244+
Record.CallSites.reserve(CallSites.size());
245+
for (const IndexedCallSiteInfo &CS : CallSites) {
246+
std::vector<Frame> Frames = Callback(CS.CSId);
247+
Record.CallSites.emplace_back(std::move(Frames), CS.CalleeGuids);
248+
}
247249

248250
return Record;
249251
}

llvm/lib/ProfileData/MemProfReader.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ Error RawMemProfReader::mapRawProfileToRecords() {
521521
// we insert a new entry for callsite data if we need to.
522522
IndexedMemProfRecord &Record = MemProfData.Records[Id];
523523
for (LocationPtr Loc : Locs)
524-
Record.CallSiteIds.push_back(MemProfData.addCallStack(*Loc));
524+
Record.CallSites.emplace_back(MemProfData.addCallStack(*Loc));
525525
}
526526

527527
return Error::success();
@@ -808,10 +808,10 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
808808
IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
809809
}
810810

811-
// Populate CallSiteIds.
811+
// Populate CallSites with CalleeGuids.
812812
for (const auto &CallSite : Record.CallSites) {
813-
CallStackId CSId = AddCallStack(CallSite);
814-
IndexedRecord.CallSiteIds.push_back(CSId);
813+
CallStackId CSId = AddCallStack(CallSite.Frames);
814+
IndexedRecord.CallSites.emplace_back(CSId, CallSite.CalleeGuids);
815815
}
816816

817817
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
954954
UndriftCallStack(AS.CallStack);
955955

956956
for (auto &CS : MemProfRec.CallSites)
957-
UndriftCallStack(CS);
957+
UndriftCallStack(CS.Frames);
958958
}
959959

960960
static void
@@ -1048,15 +1048,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10481048
// Need to record all frames from leaf up to and including this function,
10491049
// as any of these may or may not have been inlined at this point.
10501050
unsigned Idx = 0;
1051-
for (auto &StackFrame : CS) {
1051+
for (auto &StackFrame : CS.Frames) {
10521052
uint64_t StackId = computeStackId(StackFrame);
1053-
LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
1053+
LocHashToCallSites[StackId].insert(
1054+
ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
10541055
ProfileHasColumns |= StackFrame.Column;
10551056
// Once we find this function, we can stop recording.
10561057
if (StackFrame.Function == FuncGUID)
10571058
break;
10581059
}
1059-
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1060+
assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);
10601061
}
10611062

10621063
auto GetOffset = [](const DILocation *DIL) {

llvm/test/Transforms/PGOProfile/memprof-call-site-at-alloc-site.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ HeapProfileRecords:
2020
TotalLifetime: 1000000
2121
TotalLifetimeAccessDensity: 1
2222
CallSites:
23-
- - { Function: _Z3foov, LineOffset: 6, Column: 12, IsInlineFrame: false }
23+
- Frames:
24+
- { Function: _Z3foov, LineOffset: 6, Column: 12, IsInlineFrame: false }
2425
...
2526

2627
;--- memprof-call-site-at-alloc-site.ll

llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,19 @@ HeapProfileRecords:
4242
- GUID: main
4343
AllocSites: []
4444
CallSites:
45-
- - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
45+
- Frames:
46+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
4647
- GUID: _ZL2f1v
4748
AllocSites: []
4849
CallSites:
49-
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
50+
- Frames:
51+
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
5052
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
5153
- GUID: _ZL2f2v
5254
AllocSites: []
5355
CallSites:
54-
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
56+
- Frames:
57+
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
5558
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
5659
- GUID: _Z2f3v
5760
AllocSites:

llvm/test/Transforms/PGOProfile/memprof-undrift.test

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,13 @@ HeapProfileRecords:
3535
- GUID: _Z3aaav
3636
AllocSites: []
3737
CallSites:
38-
- - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
38+
- Frames:
39+
- { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
3940
- GUID: _Z6middlev
4041
AllocSites: []
4142
CallSites:
42-
- - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
43+
- Frames:
44+
- { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
4345
- GUID: _Z3foov
4446
AllocSites:
4547
- Callstack:
@@ -77,7 +79,8 @@ HeapProfileRecords:
7779
- GUID: _Z3bbbv
7880
AllocSites: []
7981
CallSites:
80-
- - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
82+
- Frames:
83+
- { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
8184
...
8285
;--- memprof_undrift.ll
8386
define dso_local ptr @_Z3foov() !dbg !5 {

llvm/test/tools/llvm-profdata/memprof-yaml.test

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
; REQUIRES: x86_64-linux
21
; RUN: split-file %s %t
32
; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
43
; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
54
; RUN: cmp %t/memprof-in.yaml %t/memprof-out.yaml
65

6+
; This test is expected to fail until the profile format is updated to handle CalleeGuids.
7+
; XFAIL: *
8+
79
; Verify that the YAML output is identical to the YAML input.
810
;--- memprof-in.yaml
911
---
@@ -27,8 +29,12 @@ HeapProfileRecords:
2729
TotalLifetime: 777
2830
TotalLifetimeAccessDensity: 888
2931
CallSites:
30-
- - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
32+
- Frames:
33+
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
3134
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
32-
- - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
35+
CalleeGuids: [0x100, 0x200]
36+
- Frames:
37+
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
3338
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
39+
CalleeGuids: [0x300]
3440
...

llvm/unittests/ProfileData/InstrProfTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
397397
for (const auto &CSId : AllocFrames)
398398
MR.AllocSites.emplace_back(CSId, Block, Schema);
399399
for (const auto &CSId : CallSiteFrames)
400-
MR.CallSiteIds.push_back(CSId);
400+
MR.CallSites.push_back(llvm::memprof::IndexedCallSiteInfo(CSId));
401401
return MR;
402402
}
403403

0 commit comments

Comments
 (0)