@@ -149,11 +149,33 @@ static cl::opt<bool> ClMemProfMatchHotColdNew(
149
149
" Match allocation profiles onto existing hot/cold operator new calls" ),
150
150
cl::Hidden, cl::init(false ));
151
151
152
+ static cl::opt<bool >
153
+ ClPrintMemProfMatchInfo (" memprof-print-match-info" ,
154
+ cl::desc (" Print matching stats for each allocation "
155
+ " context in this module's profiles" ),
156
+ cl::Hidden, cl::init(false ));
157
+
158
+ // Instrumentation statistics
152
159
STATISTIC (NumInstrumentedReads, " Number of instrumented reads" );
153
160
STATISTIC (NumInstrumentedWrites, " Number of instrumented writes" );
154
161
STATISTIC (NumSkippedStackReads, " Number of non-instrumented stack reads" );
155
162
STATISTIC (NumSkippedStackWrites, " Number of non-instrumented stack writes" );
163
+
164
+ // Matching statistics
156
165
STATISTIC (NumOfMemProfMissing, " Number of functions without memory profile." );
166
+ STATISTIC (NumOfMemProfMismatch,
167
+ " Number of functions having mismatched memory profile hash." );
168
+ STATISTIC (NumOfMemProfFunc, " Number of functions having valid memory profile." );
169
+ STATISTIC (NumOfMemProfAllocContextProfiles,
170
+ " Number of alloc contexts in memory profile." );
171
+ STATISTIC (NumOfMemProfCallSiteProfiles,
172
+ " Number of callsites in memory profile." );
173
+ STATISTIC (NumOfMemProfMatchedAllocContexts,
174
+ " Number of matched memory profile alloc contexts." );
175
+ STATISTIC (NumOfMemProfMatchedAllocs,
176
+ " Number of matched memory profile allocs." );
177
+ STATISTIC (NumOfMemProfMatchedCallSites,
178
+ " Number of matched memory profile callsites." );
157
179
158
180
namespace {
159
181
@@ -637,15 +659,30 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
637
659
return computeStackId (Frame.Function , Frame.LineOffset , Frame.Column );
638
660
}
639
661
640
- static void addCallStack (CallStackTrie &AllocTrie,
641
- const AllocationInfo *AllocInfo) {
662
+ // Helper to generate a single hash id for a given callstack, used for emitting
663
+ // matching statistics and useful for uniquing such statistics across modules.
664
+ static uint64_t
665
+ computeFullStackId (const SmallVectorImpl<memprof::Frame> &CallStack) {
666
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8 >, llvm::endianness::little>
667
+ HashBuilder;
668
+ for (auto &F : CallStack)
669
+ HashBuilder.add (F.Function , F.LineOffset , F.Column );
670
+ llvm::BLAKE3Result<8 > Hash = HashBuilder.final ();
671
+ uint64_t Id;
672
+ std::memcpy (&Id, Hash.data (), sizeof (Hash));
673
+ return Id;
674
+ }
675
+
676
+ static AllocationType addCallStack (CallStackTrie &AllocTrie,
677
+ const AllocationInfo *AllocInfo) {
642
678
SmallVector<uint64_t > StackIds;
643
679
for (const auto &StackFrame : AllocInfo->CallStack )
644
680
StackIds.push_back (computeStackId (StackFrame));
645
681
auto AllocType = getAllocType (AllocInfo->Info .getTotalLifetimeAccessDensity (),
646
682
AllocInfo->Info .getAllocCount (),
647
683
AllocInfo->Info .getTotalLifetime ());
648
684
AllocTrie.addCallStack (AllocType, StackIds);
685
+ return AllocType;
649
686
}
650
687
651
688
// Helper to compare the InlinedCallStack computed from an instruction's debug
@@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee,
701
738
}
702
739
}
703
740
704
- static void readMemprof (Module &M, Function &F,
705
- IndexedInstrProfReader *MemProfReader,
706
- const TargetLibraryInfo &TLI) {
741
+ struct AllocMatchInfo {
742
+ uint64_t TotalSize = 0 ;
743
+ AllocationType AllocType = AllocationType::None;
744
+ bool Matched = false ;
745
+ };
746
+
747
+ static void
748
+ readMemprof (Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
749
+ const TargetLibraryInfo &TLI,
750
+ std::map<uint64_t , AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
707
751
auto &Ctx = M.getContext ();
708
752
// Previously we used getIRPGOFuncName() here. If F is local linkage,
709
753
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F,
727
771
SkipWarning = !PGOWarnMissing;
728
772
LLVM_DEBUG (dbgs () << " unknown function" );
729
773
} else if (Err == instrprof_error::hash_mismatch) {
774
+ NumOfMemProfMismatch++;
730
775
SkipWarning =
731
776
NoPGOWarnMismatch ||
732
777
(NoPGOWarnMismatchComdatWeak &&
@@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F,
748
793
return ;
749
794
}
750
795
796
+ NumOfMemProfFunc++;
797
+
751
798
// Detect if there are non-zero column numbers in the profile. If not,
752
799
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
753
800
// columns in the IR). The profiled binary might have been built with
@@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F,
762
809
std::map<uint64_t , std::set<std::pair<const std::vector<Frame> *, unsigned >>>
763
810
LocHashToCallSites;
764
811
for (auto &AI : MemProfRec->AllocSites ) {
812
+ NumOfMemProfAllocContextProfiles++;
765
813
// Associate the allocation info with the leaf frame. The later matching
766
814
// code will match any inlined call sequences in the IR with a longer prefix
767
815
// of call stack frames.
@@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F,
770
818
ProfileHasColumns |= AI.CallStack [0 ].Column ;
771
819
}
772
820
for (auto &CS : MemProfRec->CallSites ) {
821
+ NumOfMemProfCallSiteProfiles++;
773
822
// Need to record all frames from leaf up to and including this function,
774
823
// as any of these may or may not have been inlined at this point.
775
824
unsigned Idx = 0 ;
@@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F,
863
912
// If we found and thus matched all frames on the call, include
864
913
// this MIB.
865
914
if (stackFrameIncludesInlinedCallStack (AllocInfo->CallStack ,
866
- InlinedCallStack))
867
- addCallStack (AllocTrie, AllocInfo);
915
+ InlinedCallStack)) {
916
+ NumOfMemProfMatchedAllocContexts++;
917
+ auto AllocType = addCallStack (AllocTrie, AllocInfo);
918
+ // Record information about the allocation if match info printing
919
+ // was requested.
920
+ if (ClPrintMemProfMatchInfo) {
921
+ auto FullStackId = computeFullStackId (AllocInfo->CallStack );
922
+ FullStackIdToAllocMatchInfo[FullStackId] = {
923
+ AllocInfo->Info .getTotalSize (), AllocType, /* Matched=*/ true };
924
+ }
925
+ }
868
926
}
869
927
// We might not have matched any to the full inlined call stack.
870
928
// But if we did, create and attach metadata, or a function attribute if
871
929
// all contexts have identical profiled behavior.
872
930
if (!AllocTrie.empty ()) {
931
+ NumOfMemProfMatchedAllocs++;
873
932
// MemprofMDAttached will be false if a function attribute was
874
933
// attached.
875
934
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata (CI);
@@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F,
897
956
// attach call stack metadata.
898
957
if (stackFrameIncludesInlinedCallStack (
899
958
*CallStackIdx.first , InlinedCallStack, CallStackIdx.second )) {
959
+ NumOfMemProfMatchedCallSites++;
900
960
addCallsiteMetadata (I, InlinedCallStack, Ctx);
901
961
// Only need to find one with a matching call stack and add a single
902
962
// callsite metadata.
@@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
942
1002
943
1003
auto &FAM = AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
944
1004
1005
+ // Map from the stack has of each allocation context in the function profiles
1006
+ // to the total profiled size (bytes), allocation type, and whether we matched
1007
+ // it to an allocation in the IR.
1008
+ std::map<uint64_t , AllocMatchInfo> FullStackIdToAllocMatchInfo;
1009
+
945
1010
for (auto &F : M) {
946
1011
if (F.isDeclaration ())
947
1012
continue ;
948
1013
949
1014
const TargetLibraryInfo &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
950
- readMemprof (M, F, MemProfReader.get (), TLI);
1015
+ readMemprof (M, F, MemProfReader.get (), TLI, FullStackIdToAllocMatchInfo);
1016
+ }
1017
+
1018
+ if (ClPrintMemProfMatchInfo) {
1019
+ for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1020
+ errs () << " MemProf " << getAllocTypeAttributeString (Info.AllocType )
1021
+ << " context with id " << Id << " has total profiled size "
1022
+ << Info.TotalSize << (Info.Matched ? " is" : " not" )
1023
+ << " matched\n " ;
951
1024
}
952
1025
953
1026
return PreservedAnalyses::none ();
0 commit comments