Skip to content

Commit fbdb5ae

Browse files
authored
[BOLT] Build heatmap with pre-aggregated data (#138798)
Reuse data structures used by perf data reader for pre-aggregated data. Combined with #136531 this allows using pre-aggregated data for heatmap. Test Plan: heatmap-preagg.test
1 parent 6f1f6d1 commit fbdb5ae

File tree

3 files changed

+92
-89
lines changed

3 files changed

+92
-89
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,6 @@ class DataAggregator : public DataReader {
9292
uint64_t Addr;
9393
};
9494

95-
/// Used for parsing specific pre-aggregated input files.
96-
struct AggregatedLBREntry {
97-
enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
98-
Location From;
99-
Location To;
100-
uint64_t Count;
101-
uint64_t Mispreds;
102-
Type EntryType;
103-
};
104-
10595
struct Trace {
10696
uint64_t From;
10797
uint64_t To;
@@ -131,7 +121,6 @@ class DataAggregator : public DataReader {
131121
/// and use them later for processing and assigning profile.
132122
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
133123
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
134-
std::vector<AggregatedLBREntry> AggregatedLBRs;
135124
std::unordered_map<uint64_t, uint64_t> BasicSamples;
136125
std::vector<PerfMemSample> MemSamples;
137126

@@ -423,9 +412,6 @@ class DataAggregator : public DataReader {
423412
/// an external tool.
424413
std::error_code parsePreAggregatedLBRSamples();
425414

426-
/// Process parsed pre-aggregated data.
427-
void processPreAggregated();
428-
429415
/// If \p Address falls into the binary address space based on memory
430416
/// mapping info \p MMI, then adjust it for further processing by subtracting
431417
/// the base load address. External addresses, i.e. addresses that do not

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 59 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -444,19 +444,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
444444
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
445445
this->BC = &BC;
446446

447-
if (opts::ReadPreAggregated) {
448-
parsePreAggregated();
449-
return Error::success();
450-
}
451-
452-
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
453-
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
454-
processFileBuildID(*FileBuildID);
455-
} else {
456-
errs() << "BOLT-WARNING: build-id will not be checked because we could "
457-
"not read one from input binary\n";
458-
}
459-
460447
auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
461448
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
462449
exit(1);
@@ -469,6 +456,19 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
469456
ErrorCallback(ReturnCode, ErrBuf);
470457
};
471458

459+
if (opts::ReadPreAggregated) {
460+
parsePreAggregated();
461+
goto heatmap;
462+
}
463+
464+
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
465+
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
466+
processFileBuildID(*FileBuildID);
467+
} else {
468+
errs() << "BOLT-WARNING: build-id will not be checked because we could "
469+
"not read one from input binary\n";
470+
}
471+
472472
if (BC.IsLinuxKernel) {
473473
// Current MMap parsing logic does not work with linux kernel.
474474
// MMap entries for linux kernel uses PERF_RECORD_MMAP
@@ -501,14 +501,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
501501
(opts::BasicAggregation && parseBasicEvents()))
502502
errs() << "PERF2BOLT: failed to parse samples\n";
503503

504-
if (opts::HeatmapMode) {
505-
if (std::error_code EC = printLBRHeatMap()) {
506-
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
507-
exit(1);
508-
}
509-
exit(0);
510-
}
511-
512504
// Special handling for memory events
513505
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
514506
return Error::success();
@@ -519,6 +511,14 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
519511

520512
deleteTempFiles();
521513

514+
heatmap:
515+
if (opts::HeatmapMode) {
516+
if (std::error_code EC = printLBRHeatMap()) {
517+
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
518+
exit(1);
519+
}
520+
exit(0);
521+
}
522522
return Error::success();
523523
}
524524

@@ -555,9 +555,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
555555
}
556556

557557
void DataAggregator::processProfile(BinaryContext &BC) {
558-
if (opts::ReadPreAggregated)
559-
processPreAggregated();
560-
else if (opts::BasicAggregation)
558+
if (opts::BasicAggregation)
561559
processBasicEvents();
562560
else
563561
processBranchEvents();
@@ -586,7 +584,6 @@ void DataAggregator::processProfile(BinaryContext &BC) {
586584
// Release intermediate storage.
587585
clear(BranchLBRs);
588586
clear(FallthroughLBRs);
589-
clear(AggregatedLBRs);
590587
clear(BasicSamples);
591588
clear(MemSamples);
592589
}
@@ -1215,15 +1212,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12151212
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
12161213
if (std::error_code EC = TypeOrErr.getError())
12171214
return EC;
1218-
auto Type = AggregatedLBREntry::TRACE;
1219-
if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
1220-
} else if (TypeOrErr.get() == "B") {
1221-
Type = AggregatedLBREntry::BRANCH;
1222-
} else if (TypeOrErr.get() == "F") {
1223-
Type = AggregatedLBREntry::FT;
1224-
} else if (TypeOrErr.get() == "f") {
1225-
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1226-
} else {
1215+
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
1216+
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
1217+
.Case("T", TRACE)
1218+
.Case("B", BRANCH)
1219+
.Case("F", FT)
1220+
.Case("f", FT_EXTERNAL_ORIGIN)
1221+
.Default(INVALID);
1222+
if (Type == INVALID) {
12271223
reportError("expected T, B, F or f");
12281224
return make_error_code(llvm::errc::io_error);
12291225
}
@@ -1279,13 +1275,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12791275
BF->setHasProfileAvailable();
12801276

12811277
uint64_t Count = static_cast<uint64_t>(Frequency.get());
1282-
AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
1283-
AggregatedLBRs.emplace_back(Entry);
1284-
if (Type == AggregatedLBREntry::TRACE) {
1285-
auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
1286-
: AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1287-
AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
1288-
AggregatedLBRs.emplace_back(TraceFt);
1278+
1279+
Trace Trace(From->Offset, To->Offset);
1280+
// Taken trace
1281+
if (Type == TRACE || Type == BRANCH) {
1282+
TakenBranchInfo &Info = BranchLBRs[Trace];
1283+
Info.TakenCount += Count;
1284+
Info.MispredCount += Mispreds;
1285+
1286+
NumTotalSamples += Count;
1287+
}
1288+
// Construct fallthrough part of the trace
1289+
if (Type == TRACE) {
1290+
Trace.From = To->Offset;
1291+
Trace.To = TraceFtEnd->Offset;
1292+
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
1293+
}
1294+
// Add fallthrough trace
1295+
if (Type != BRANCH) {
1296+
FTInfo &Info = FallthroughLBRs[Trace];
1297+
(Type == FT ? Info.InternCount : Info.ExternCount) += Count;
1298+
1299+
NumTraces += Count;
12891300
}
12901301

12911302
return std::error_code();
@@ -1567,7 +1578,6 @@ std::error_code DataAggregator::parseBranchEvents() {
15671578
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
15681579
}
15691580
}
1570-
printBranchSamplesDiagnostics();
15711581

15721582
return std::error_code();
15731583
}
@@ -1595,6 +1605,7 @@ void DataAggregator::processBranchEvents() {
15951605
const TakenBranchInfo &Info = AggrLBR.second;
15961606
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
15971607
}
1608+
printBranchSamplesDiagnostics();
15981609
}
15991610

16001611
std::error_code DataAggregator::parseBasicEvents() {
@@ -1704,43 +1715,16 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
17041715
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
17051716
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
17061717
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1707-
while (hasData())
1718+
size_t AggregatedLBRs = 0;
1719+
while (hasData()) {
17081720
if (std::error_code EC = parseAggregatedLBREntry())
17091721
return EC;
1710-
1711-
return std::error_code();
1712-
}
1713-
1714-
void DataAggregator::processPreAggregated() {
1715-
outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1716-
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1717-
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1718-
1719-
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1720-
switch (AggrEntry.EntryType) {
1721-
case AggregatedLBREntry::BRANCH:
1722-
case AggregatedLBREntry::TRACE:
1723-
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1724-
AggrEntry.Mispreds);
1725-
NumTotalSamples += AggrEntry.Count;
1726-
break;
1727-
case AggregatedLBREntry::FT:
1728-
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1729-
LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1730-
? AggrEntry.From.Offset
1731-
: 0,
1732-
AggrEntry.From.Offset, false};
1733-
LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1734-
doTrace(First, Second, AggrEntry.Count);
1735-
NumTraces += AggrEntry.Count;
1736-
break;
1737-
}
1738-
}
1722+
++AggregatedLBRs;
17391723
}
17401724

1741-
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1742-
<< " aggregated LBR entries\n";
1743-
printBranchSamplesDiagnostics();
1725+
outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";
1726+
1727+
return std::error_code();
17441728
}
17451729

17461730
std::optional<int32_t> DataAggregator::parseCommExecEvent() {

bolt/test/X86/heatmap-preagg.test

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
## Test heatmap with pre-aggregated profile
2+
3+
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
4+
## Non-BOLTed input binary
5+
RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
6+
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
7+
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv
8+
9+
## BOLTed input binary
10+
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
11+
RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
12+
RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
13+
RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
14+
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
15+
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv
16+
17+
CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
18+
CHECK-HEATMAP: HEATMAP: invalid traces: 1
19+
20+
CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545
21+
CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583
22+
CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872
23+
CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
24+
25+
CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
26+
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
27+
28+
CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888
29+
CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132
30+
CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
31+
CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
32+
CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595
33+
CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000

0 commit comments

Comments
 (0)