Skip to content

Commit 7536474

Browse files
[MemProf] Add matching statistics and tracing (#94814)
To help debug or surface matching issues, add more statistics to the matching. Also add optional emission of each context seen in the function profiles along with its allocation type, size in bytes, and whether it was matched. This information is emitted along with a hash of the full stack context, to allow deduplication across modules for allocations within header files.
1 parent 6b4c122 commit 7536474

File tree

2 files changed

+101
-9
lines changed

2 files changed

+101
-9
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,33 @@ static cl::opt<bool> ClMemProfMatchHotColdNew(
149149
"Match allocation profiles onto existing hot/cold operator new calls"),
150150
cl::Hidden, cl::init(false));
151151

152+
static cl::opt<bool>
153+
ClPrintMemProfMatchInfo("memprof-print-match-info",
154+
cl::desc("Print matching stats for each allocation "
155+
"context in this module's profiles"),
156+
cl::Hidden, cl::init(false));
157+
158+
// Instrumentation statistics
152159
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
153160
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
154161
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
155162
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
163+
164+
// Matching statistics
156165
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
166+
STATISTIC(NumOfMemProfMismatch,
167+
"Number of functions having mismatched memory profile hash.");
168+
STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
169+
STATISTIC(NumOfMemProfAllocContextProfiles,
170+
"Number of alloc contexts in memory profile.");
171+
STATISTIC(NumOfMemProfCallSiteProfiles,
172+
"Number of callsites in memory profile.");
173+
STATISTIC(NumOfMemProfMatchedAllocContexts,
174+
"Number of matched memory profile alloc contexts.");
175+
STATISTIC(NumOfMemProfMatchedAllocs,
176+
"Number of matched memory profile allocs.");
177+
STATISTIC(NumOfMemProfMatchedCallSites,
178+
"Number of matched memory profile callsites.");
157179

158180
namespace {
159181

@@ -637,15 +659,30 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
637659
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
638660
}
639661

640-
static void addCallStack(CallStackTrie &AllocTrie,
641-
const AllocationInfo *AllocInfo) {
662+
// Helper to generate a single hash id for a given callstack, used for emitting
663+
// matching statistics and useful for uniquing such statistics across modules.
664+
static uint64_t
665+
computeFullStackId(const SmallVectorImpl<memprof::Frame> &CallStack) {
666+
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
667+
HashBuilder;
668+
for (auto &F : CallStack)
669+
HashBuilder.add(F.Function, F.LineOffset, F.Column);
670+
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
671+
uint64_t Id;
672+
std::memcpy(&Id, Hash.data(), sizeof(Hash));
673+
return Id;
674+
}
675+
676+
static AllocationType addCallStack(CallStackTrie &AllocTrie,
677+
const AllocationInfo *AllocInfo) {
642678
SmallVector<uint64_t> StackIds;
643679
for (const auto &StackFrame : AllocInfo->CallStack)
644680
StackIds.push_back(computeStackId(StackFrame));
645681
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
646682
AllocInfo->Info.getAllocCount(),
647683
AllocInfo->Info.getTotalLifetime());
648684
AllocTrie.addCallStack(AllocType, StackIds);
685+
return AllocType;
649686
}
650687

651688
// Helper to compare the InlinedCallStack computed from an instruction's debug
@@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee,
701738
}
702739
}
703740

704-
static void readMemprof(Module &M, Function &F,
705-
IndexedInstrProfReader *MemProfReader,
706-
const TargetLibraryInfo &TLI) {
741+
struct AllocMatchInfo {
742+
uint64_t TotalSize = 0;
743+
AllocationType AllocType = AllocationType::None;
744+
bool Matched = false;
745+
};
746+
747+
static void
748+
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
749+
const TargetLibraryInfo &TLI,
750+
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
707751
auto &Ctx = M.getContext();
708752
// Previously we used getIRPGOFuncName() here. If F is local linkage,
709753
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F,
727771
SkipWarning = !PGOWarnMissing;
728772
LLVM_DEBUG(dbgs() << "unknown function");
729773
} else if (Err == instrprof_error::hash_mismatch) {
774+
NumOfMemProfMismatch++;
730775
SkipWarning =
731776
NoPGOWarnMismatch ||
732777
(NoPGOWarnMismatchComdatWeak &&
@@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F,
748793
return;
749794
}
750795

796+
NumOfMemProfFunc++;
797+
751798
// Detect if there are non-zero column numbers in the profile. If not,
752799
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
753800
// columns in the IR). The profiled binary might have been built with
@@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F,
762809
std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>
763810
LocHashToCallSites;
764811
for (auto &AI : MemProfRec->AllocSites) {
812+
NumOfMemProfAllocContextProfiles++;
765813
// Associate the allocation info with the leaf frame. The later matching
766814
// code will match any inlined call sequences in the IR with a longer prefix
767815
// of call stack frames.
@@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F,
770818
ProfileHasColumns |= AI.CallStack[0].Column;
771819
}
772820
for (auto &CS : MemProfRec->CallSites) {
821+
NumOfMemProfCallSiteProfiles++;
773822
// Need to record all frames from leaf up to and including this function,
774823
// as any of these may or may not have been inlined at this point.
775824
unsigned Idx = 0;
@@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F,
863912
// If we found and thus matched all frames on the call, include
864913
// this MIB.
865914
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
866-
InlinedCallStack))
867-
addCallStack(AllocTrie, AllocInfo);
915+
InlinedCallStack)) {
916+
NumOfMemProfMatchedAllocContexts++;
917+
auto AllocType = addCallStack(AllocTrie, AllocInfo);
918+
// Record information about the allocation if match info printing
919+
// was requested.
920+
if (ClPrintMemProfMatchInfo) {
921+
auto FullStackId = computeFullStackId(AllocInfo->CallStack);
922+
FullStackIdToAllocMatchInfo[FullStackId] = {
923+
AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
924+
}
925+
}
868926
}
869927
// We might not have matched any to the full inlined call stack.
870928
// But if we did, create and attach metadata, or a function attribute if
871929
// all contexts have identical profiled behavior.
872930
if (!AllocTrie.empty()) {
931+
NumOfMemProfMatchedAllocs++;
873932
// MemprofMDAttached will be false if a function attribute was
874933
// attached.
875934
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
@@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F,
897956
// attach call stack metadata.
898957
if (stackFrameIncludesInlinedCallStack(
899958
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
959+
NumOfMemProfMatchedCallSites++;
900960
addCallsiteMetadata(I, InlinedCallStack, Ctx);
901961
// Only need to find one with a matching call stack and add a single
902962
// callsite metadata.
@@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
9421002

9431003
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
9441004

1005+
// Map from the stack has of each allocation context in the function profiles
1006+
// to the total profiled size (bytes), allocation type, and whether we matched
1007+
// it to an allocation in the IR.
1008+
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1009+
9451010
for (auto &F : M) {
9461011
if (F.isDeclaration())
9471012
continue;
9481013

9491014
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
950-
readMemprof(M, F, MemProfReader.get(), TLI);
1015+
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
1016+
}
1017+
1018+
if (ClPrintMemProfMatchInfo) {
1019+
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1020+
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1021+
<< " context with id " << Id << " has total profiled size "
1022+
<< Info.TotalSize << (Info.Matched ? " is" : " not")
1023+
<< " matched\n";
9511024
}
9521025

9531026
return PreservedAnalyses::none();

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; REQUIRES: zlib
66
;; Avoid failures on big-endian systems that can't read the profile properly
77
; REQUIRES: x86_64-linux
8+
;; -stats requires asserts
9+
; REQUIRES: asserts
810

911
;; TODO: Use text profile inputs once that is available for memprof.
1012
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
@@ -25,7 +27,7 @@
2527
; ALL-NOT: no profile data available for function
2628

2729
;; Using a memprof-only profile for memprof-use should only give memprof metadata
28-
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
30+
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS
2931
; There should not be any PGO metadata
3032
; MEMPROFONLY-NOT: !prof
3133

@@ -61,6 +63,15 @@
6163
;; give both memprof and pgo metadata.
6264
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
6365

66+
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
67+
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
68+
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
69+
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
70+
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
71+
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
72+
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
73+
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
74+
6475
; ModuleID = 'memprof.cc'
6576
source_filename = "memprof.cc"
6677
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -346,6 +357,14 @@ for.end: ; preds = %for.cond
346357
; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409}
347358
; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810}
348359

360+
; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile.
361+
; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
362+
; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile.
363+
; MEMPROFSTATS: 8 memprof - Number of matched memory profile alloc contexts.
364+
; MEMPROFSTATS: 3 memprof - Number of matched memory profile allocs.
365+
; MEMPROFSTATS: 10 memprof - Number of matched memory profile callsites.
366+
367+
349368
; Function Attrs: argmemonly nofree nounwind willreturn writeonly
350369
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
351370

0 commit comments

Comments
 (0)