-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[BOLT] Build heatmap with pre-aggregated data #138798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Build heatmap with pre-aggregated data #138798
Conversation
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesReuse data structures used by perf data reader for pre-aggregated data. Test Plan: heatmap-preagg.test Full diff: https://github.com/llvm/llvm-project/pull/138798.diff 3 Files Affected:
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index c4ee75e7a6da6..d66d198e37d61 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -92,16 +92,6 @@ class DataAggregator : public DataReader {
uint64_t Addr;
};
- /// Used for parsing specific pre-aggregated input files.
- struct AggregatedLBREntry {
- enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
- Location From;
- Location To;
- uint64_t Count;
- uint64_t Mispreds;
- Type EntryType;
- };
-
struct Trace {
uint64_t From;
uint64_t To;
@@ -131,7 +121,6 @@ class DataAggregator : public DataReader {
/// and use them later for processing and assigning profile.
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
- std::vector<AggregatedLBREntry> AggregatedLBRs;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;
@@ -416,14 +405,7 @@ class DataAggregator : public DataReader {
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0
/// B 4b196f 4b19e0 2 0
- void parsePreAggregated();
-
- /// Parse the full output of pre-aggregated LBR samples generated by
- /// an external tool.
- std::error_code parsePreAggregatedLBRSamples();
-
- /// Process parsed pre-aggregated data.
- void processPreAggregated();
+ std::error_code parsePreAggregated();
/// If \p Address falls into the binary address space based on memory
/// mapping info \p MMI, then adjust it for further processing by subtracting
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index aea6c67546ab1..a5ac87ee781b2 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -349,25 +349,29 @@ bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
return false;
}
-void DataAggregator::parsePreAggregated() {
- std::string Error;
+std::error_code DataAggregator::parsePreAggregated() {
+ outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
+ NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
- if (std::error_code EC = MB.getError()) {
- errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
- << EC.message() << "\n";
- exit(1);
- }
+ if (std::error_code EC = MB.getError())
+ return EC;
FileBuf = std::move(*MB);
ParsingBuf = FileBuf->getBuffer();
Col = 0;
Line = 1;
- if (parsePreAggregatedLBRSamples()) {
- errs() << "PERF2BOLT: failed to parse samples\n";
- exit(1);
+ size_t AggregatedLBRs = 0;
+ while (hasData()) {
+ if (std::error_code EC = parseAggregatedLBREntry())
+ return EC;
+ ++AggregatedLBRs;
}
+
+ outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";
+ return std::error_code();
}
void DataAggregator::filterBinaryMMapInfo() {
@@ -446,11 +450,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;
- if (opts::ReadPreAggregated) {
- parsePreAggregated();
- return Error::success();
- }
-
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
@@ -471,6 +470,12 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
ErrorCallback(ReturnCode, ErrBuf);
};
+ if (opts::ReadPreAggregated) {
+ if (std::error_code EC = parsePreAggregated())
+ return errorCodeToError(EC);
+ goto heatmap;
+ }
+
if (BC.IsLinuxKernel) {
// Current MMap parsing logic does not work with linux kernel.
// MMap entries for linux kernel uses PERF_RECORD_MMAP
@@ -502,12 +507,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents())
errs() << "PERF2BOLT: failed to parse samples\n";
- if (opts::HeatmapMode) {
- if (std::error_code EC = printLBRHeatMap())
- return errorCodeToError(EC);
- exit(0);
- }
-
// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
@@ -518,6 +517,13 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
deleteTempFiles();
+heatmap:
+ if (opts::HeatmapMode) {
+ if (std::error_code EC = printLBRHeatMap())
+ return errorCodeToError(EC);
+ exit(0);
+ }
+
return Error::success();
}
@@ -554,9 +560,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
}
void DataAggregator::processProfile(BinaryContext &BC) {
- if (opts::ReadPreAggregated)
- processPreAggregated();
- else if (opts::BasicAggregation)
+ if (opts::BasicAggregation)
processBasicEvents();
else
processBranchEvents();
@@ -584,7 +588,6 @@ void DataAggregator::processProfile(BinaryContext &BC) {
// Release intermediate storage.
clear(BranchLBRs);
clear(FallthroughLBRs);
- clear(AggregatedLBRs);
clear(BasicSamples);
clear(MemSamples);
}
@@ -1213,15 +1216,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- auto Type = AggregatedLBREntry::TRACE;
- if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
- } else if (TypeOrErr.get() == "B") {
- Type = AggregatedLBREntry::BRANCH;
- } else if (TypeOrErr.get() == "F") {
- Type = AggregatedLBREntry::FT;
- } else if (TypeOrErr.get() == "f") {
- Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
- } else {
+ enum TType { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+ auto Type = StringSwitch<TType>(TypeOrErr.get())
+ .Case("T", TRACE)
+ .Case("B", BRANCH)
+ .Case("F", FT)
+ .Case("f", FT_EXTERNAL_ORIGIN)
+ .Default(INVALID);
+ if (Type == INVALID) {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
@@ -1239,7 +1241,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return EC;
ErrorOr<Location> TraceFtEnd = std::error_code();
- if (Type == AggregatedLBREntry::TRACE) {
+ if (Type == TRACE) {
while (checkAndConsumeFS()) {
}
TraceFtEnd = parseLocationOrOffset();
@@ -1249,13 +1251,12 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
while (checkAndConsumeFS()) {
}
- ErrorOr<int64_t> Frequency =
- parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
+ ErrorOr<int64_t> Frequency = parseNumberField(FieldSeparator, Type != BRANCH);
if (std::error_code EC = Frequency.getError())
return EC;
uint64_t Mispreds = 0;
- if (Type == AggregatedLBREntry::BRANCH) {
+ if (Type == BRANCH) {
while (checkAndConsumeFS()) {
}
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
@@ -1277,13 +1278,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
BF->setHasProfileAvailable();
uint64_t Count = static_cast<uint64_t>(Frequency.get());
- AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
- AggregatedLBRs.emplace_back(Entry);
- if (Type == AggregatedLBREntry::TRACE) {
- auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
- : AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
- AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
- AggregatedLBRs.emplace_back(TraceFt);
+
+ Trace Trace(From->Offset, To->Offset);
+ // Taken trace
+ if (Type == TRACE || Type == BRANCH) {
+ TakenBranchInfo &Info = BranchLBRs[Trace];
+ Info.TakenCount += Count;
+ Info.MispredCount += Mispreds;
+
+ NumTotalSamples += Count;
+ }
+ // Construct fallthrough part of the trace
+ if (Type == TRACE) {
+ Trace.From = To->Offset;
+ Trace.To = TraceFtEnd->Offset;
+ Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
+ }
+ // Add fallthrough trace
+ if (Type != BRANCH) {
+ FTInfo &Info = FallthroughLBRs[Trace];
+ (Type == FT ? Info.InternCount : Info.ExternCount) += Count;
+
+ NumTraces += Count;
}
return std::error_code();
@@ -1560,7 +1576,6 @@ std::error_code DataAggregator::parseBranchEvents() {
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
- printBranchSamplesDiagnostics();
return std::error_code();
}
@@ -1588,6 +1603,7 @@ void DataAggregator::processBranchEvents() {
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
+ printBranchSamplesDiagnostics();
}
std::error_code DataAggregator::parseBasicEvents() {
@@ -1693,49 +1709,6 @@ void DataAggregator::processMemEvents() {
}
}
-std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
- outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
- NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
- TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
- while (hasData())
- if (std::error_code EC = parseAggregatedLBREntry())
- return EC;
-
- return std::error_code();
-}
-
-void DataAggregator::processPreAggregated() {
- outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
- NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
- TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
-
- for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
- switch (AggrEntry.EntryType) {
- case AggregatedLBREntry::BRANCH:
- case AggregatedLBREntry::TRACE:
- doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
- AggrEntry.Mispreds);
- NumTotalSamples += AggrEntry.Count;
- break;
- case AggregatedLBREntry::FT:
- case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
- LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
- ? AggrEntry.From.Offset
- : 0,
- AggrEntry.From.Offset, false};
- LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
- doTrace(First, Second, AggrEntry.Count);
- NumTraces += AggrEntry.Count;
- break;
- }
- }
- }
-
- outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
- << " aggregated LBR entries\n";
- printBranchSamplesDiagnostics();
-}
-
std::optional<int32_t> DataAggregator::parseCommExecEvent() {
size_t LineEnd = ParsingBuf.find_first_of("\n");
if (LineEnd == StringRef::npos) {
diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test
new file mode 100644
index 0000000000000..00d4d521b1adf
--- /dev/null
+++ b/bolt/test/X86/heatmap-preagg.test
@@ -0,0 +1,33 @@
+## Test heatmap with pre-aggregated profile
+
+RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
+## Non-BOLTed input binary
+RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
+RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv
+
+## BOLTed input binary
+RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
+RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
+RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
+RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
+RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
+RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv
+
+CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
+CHECK-HEATMAP: HEATMAP: invalid traces: 1
+
+CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545
+CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583
+CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872
+CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
+
+CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
+CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
+
+CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888
+CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132
+CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
+CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
+CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595
+CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000
|
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Remove duplicate profile parsing in heatmap construction, switching to using parsed profile. #138798 adds support for using pre-aggregated profile for heatmap construction. Test Plan: added heatmap.test in rafaelauler/bolt-tests@0868850
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Created using spr 1.3.4 [skip ci]
Reuse data structures used by perf data reader for pre-aggregated data.
Combined with #136531 this allows using pre-aggregated data for heatmap.
Test Plan: heatmap-preagg.test