Skip to content

[BOLT] Build heatmap with pre-aggregated data #138798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

14 changes: 0 additions & 14 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,6 @@ class DataAggregator : public DataReader {
uint64_t Addr;
};

/// Used for parsing specific pre-aggregated input files.
struct AggregatedLBREntry {
enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
Location From;
Location To;
uint64_t Count;
uint64_t Mispreds;
Type EntryType;
};

struct Trace {
uint64_t From;
uint64_t To;
Expand Down Expand Up @@ -131,7 +121,6 @@ class DataAggregator : public DataReader {
/// and use them later for processing and assigning profile.
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
std::vector<AggregatedLBREntry> AggregatedLBRs;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;

Expand Down Expand Up @@ -423,9 +412,6 @@ class DataAggregator : public DataReader {
/// an external tool.
std::error_code parsePreAggregatedLBRSamples();

/// Process parsed pre-aggregated data.
void processPreAggregated();

/// If \p Address falls into the binary address space based on memory
/// mapping info \p MMI, then adjust it for further processing by subtracting
/// the base load address. External addresses, i.e. addresses that do not
Expand Down
134 changes: 59 additions & 75 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,19 +444,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;

if (opts::ReadPreAggregated) {
parsePreAggregated();
return Error::success();
}

if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}

auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
exit(1);
Expand All @@ -469,6 +456,19 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
ErrorCallback(ReturnCode, ErrBuf);
};

if (opts::ReadPreAggregated) {
parsePreAggregated();
goto heatmap;
}

if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}

if (BC.IsLinuxKernel) {
// Current MMap parsing logic does not work with linux kernel.
// MMap entries for linux kernel uses PERF_RECORD_MMAP
Expand Down Expand Up @@ -501,14 +501,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";

if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}

// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
Expand All @@ -519,6 +511,14 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {

deleteTempFiles();

heatmap:
if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}
return Error::success();
}

Expand Down Expand Up @@ -555,9 +555,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
}

void DataAggregator::processProfile(BinaryContext &BC) {
if (opts::ReadPreAggregated)
processPreAggregated();
else if (opts::BasicAggregation)
if (opts::BasicAggregation)
processBasicEvents();
else
processBranchEvents();
Expand Down Expand Up @@ -586,7 +584,6 @@ void DataAggregator::processProfile(BinaryContext &BC) {
// Release intermediate storage.
clear(BranchLBRs);
clear(FallthroughLBRs);
clear(AggregatedLBRs);
clear(BasicSamples);
clear(MemSamples);
}
Expand Down Expand Up @@ -1215,15 +1212,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
auto Type = AggregatedLBREntry::TRACE;
if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
} else if (TypeOrErr.get() == "B") {
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
} else {
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
Expand Down Expand Up @@ -1279,13 +1275,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
BF->setHasProfileAvailable();

uint64_t Count = static_cast<uint64_t>(Frequency.get());
AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
AggregatedLBRs.emplace_back(Entry);
if (Type == AggregatedLBREntry::TRACE) {
auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
: AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
AggregatedLBRs.emplace_back(TraceFt);

Trace Trace(From->Offset, To->Offset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
Info.TakenCount += Count;
Info.MispredCount += Mispreds;

NumTotalSamples += Count;
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
Trace.From = To->Offset;
Trace.To = TraceFtEnd->Offset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace
if (Type != BRANCH) {
FTInfo &Info = FallthroughLBRs[Trace];
(Type == FT ? Info.InternCount : Info.ExternCount) += Count;

NumTraces += Count;
}

return std::error_code();
Expand Down Expand Up @@ -1567,7 +1578,6 @@ std::error_code DataAggregator::parseBranchEvents() {
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
printBranchSamplesDiagnostics();

return std::error_code();
}
Expand Down Expand Up @@ -1595,6 +1605,7 @@ void DataAggregator::processBranchEvents() {
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
printBranchSamplesDiagnostics();
}

std::error_code DataAggregator::parseBasicEvents() {
Expand Down Expand Up @@ -1704,43 +1715,16 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
while (hasData())
size_t AggregatedLBRs = 0;
while (hasData()) {
if (std::error_code EC = parseAggregatedLBREntry())
return EC;

return std::error_code();
}

void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
case AggregatedLBREntry::TRACE:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
NumTotalSamples += AggrEntry.Count;
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
? AggrEntry.From.Offset
: 0,
AggrEntry.From.Offset, false};
LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
doTrace(First, Second, AggrEntry.Count);
NumTraces += AggrEntry.Count;
break;
}
}
++AggregatedLBRs;
}

outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
printBranchSamplesDiagnostics();
outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";

return std::error_code();
}

std::optional<int32_t> DataAggregator::parseCommExecEvent() {
Expand Down
33 changes: 33 additions & 0 deletions bolt/test/X86/heatmap-preagg.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## Test heatmap with pre-aggregated profile

RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
## Non-BOLTed input binary
RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv

## BOLTed input binary
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv

CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
CHECK-HEATMAP: HEATMAP: invalid traces: 1

CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545
CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583
CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872
CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000

CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2

CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888
CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132
CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595
CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000
Loading