Skip to content

Commit 510229e

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:9223ccb0e56d6d4de17808e2e4000c8019a9a218 into amd-gfx:3189509eeb79
Local branch amd-gfx 3189509 Merged main:7c5c8b2f479fbed6afcd4072bdef76ea867577de into amd-gfx:e7c692dd6c2c Remote branch main 9223ccb Avoid std::string -> (char *) roundtrip in createStringError() (NFC) (llvm#93242)
2 parents 3189509 + 9223ccb commit 510229e

File tree

668 files changed

+23375
-9954
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

668 files changed

+23375
-9954
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "bolt/Core/BinaryData.h"
1818
#include "bolt/Core/BinarySection.h"
1919
#include "bolt/Core/DebugData.h"
20+
#include "bolt/Core/DynoStats.h"
2021
#include "bolt/Core/JumpTable.h"
2122
#include "bolt/Core/MCPlusBuilder.h"
2223
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
@@ -359,7 +360,7 @@ class BinaryContext {
359360
void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
360361

361362
bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
362-
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
363+
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }
363364

364365
/// Return true if relocations against symbol with a given name
365366
/// must be created.
@@ -717,6 +718,9 @@ class BinaryContext {
717718
uint64_t NumStaleBlocksWithEqualIcount{0};
718719
} Stats;
719720

721+
// Original binary execution count stats.
722+
DynoStats InitialDynoStats;
723+
720724
// Address of the first allocated segment.
721725
uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
722726

bolt/include/bolt/Passes/BinaryPasses.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,31 @@ class BinaryFunctionPass {
5353
virtual Error runOnFunctions(BinaryContext &BC) = 0;
5454
};
5555

56+
/// A pass to set initial program-wide dynostats.
57+
class DynoStatsSetPass : public BinaryFunctionPass {
58+
public:
59+
DynoStatsSetPass() : BinaryFunctionPass(false) {}
60+
61+
const char *getName() const override {
62+
return "set dyno-stats before optimizations";
63+
}
64+
65+
bool shouldPrint(const BinaryFunction &BF) const override { return false; }
66+
67+
Error runOnFunctions(BinaryContext &BC) override {
68+
BC.InitialDynoStats = getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
69+
return Error::success();
70+
}
71+
};
72+
5673
/// A pass to print program-wide dynostats.
5774
class DynoStatsPrintPass : public BinaryFunctionPass {
5875
protected:
59-
DynoStats PrevDynoStats;
6076
std::string Title;
6177

6278
public:
63-
DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
64-
: BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}
79+
DynoStatsPrintPass(const char *Title)
80+
: BinaryFunctionPass(false), Title(Title) {}
6581

6682
const char *getName() const override {
6783
return "print dyno-stats after optimizations";
@@ -70,6 +86,7 @@ class DynoStatsPrintPass : public BinaryFunctionPass {
7086
bool shouldPrint(const BinaryFunction &BF) const override { return false; }
7187

7288
Error runOnFunctions(BinaryContext &BC) override {
89+
const DynoStats PrevDynoStats = BC.InitialDynoStats;
7390
const DynoStats NewDynoStats =
7491
getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
7592
const bool Changed = (NewDynoStats != PrevDynoStats);

bolt/include/bolt/Passes/MCF.h

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,14 @@
99
#ifndef BOLT_PASSES_MCF_H
1010
#define BOLT_PASSES_MCF_H
1111

12+
#include "bolt/Passes/BinaryPasses.h"
13+
#include "llvm/Support/CommandLine.h"
14+
1215
namespace llvm {
1316
namespace bolt {
1417

15-
class BinaryFunction;
1618
class DataflowInfoManager;
1719

18-
enum MCFCostFunction : char {
19-
MCF_DISABLE = 0,
20-
MCF_LINEAR,
21-
MCF_QUADRATIC,
22-
MCF_LOG,
23-
MCF_BLAMEFTS
24-
};
25-
2620
/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
2721
/// without the Usability Burden" by Diego Novillo to make basic block counts
2822
/// equal if we show that A dominates B, B post-dominates A and they are in the
@@ -31,23 +25,18 @@ void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF);
3125

3226
/// Fill edge counts based on the basic block count. Used in nonLBR mode when
3327
/// we only have bb count.
34-
void estimateEdgeCounts(BinaryFunction &BF);
35-
36-
/// Entry point for computing a min-cost flow for the CFG with the goal
37-
/// of fixing the flow of the CFG edges, that is, making sure it obeys the
38-
/// flow-conservation equation SumInEdges = SumOutEdges.
39-
///
40-
/// To do this, we create an instance of the min-cost flow problem in a
41-
/// similar way as the one discussed in the work of Roy Levin "Completing
42-
/// Incomplete Edge Profile by Applying Minimum Cost Circulation Algorithms".
43-
/// We do a few things differently, though. We don't populate edge counts using
44-
/// weights coming from a static branch prediction technique and we don't
45-
/// use the same cost function.
46-
///
47-
/// If cost function BlameFTs is used, assign all remaining flow to
48-
/// fall-throughs. This is used when the sampling is based on taken branches
49-
/// that do not account for them.
50-
void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction);
28+
class EstimateEdgeCounts : public BinaryFunctionPass {
29+
void runOnFunction(BinaryFunction &BF);
30+
31+
public:
32+
explicit EstimateEdgeCounts(const cl::opt<bool> &PrintPass)
33+
: BinaryFunctionPass(PrintPass) {}
34+
35+
const char *getName() const override { return "estimate-edge-counts"; }
36+
37+
/// Pass entry point
38+
Error runOnFunctions(BinaryContext &BC) override;
39+
};
5140

5241
} // end namespace bolt
5342
} // end namespace llvm

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class BoltAddressTranslation {
9090
std::error_code parse(raw_ostream &OS, StringRef Buf);
9191

9292
/// Dump the parsed address translation tables
93-
void dump(raw_ostream &OS);
93+
void dump(raw_ostream &OS) const;
9494

9595
/// If the maps are loaded in memory, perform the lookup to translate LBR
9696
/// addresses in function located at \p FuncAddress.
@@ -137,7 +137,8 @@ class BoltAddressTranslation {
137137
/// emitted for the start of the BB. More entries may be emitted to cover
138138
/// the location of calls or any instruction that may change control flow.
139139
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
140-
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);
140+
uint64_t FuncInputAddress,
141+
uint64_t FuncOutputAddress) const;
141142

142143
/// Write the serialized address translation table for a function.
143144
template <bool Cold>
@@ -152,7 +153,7 @@ class BoltAddressTranslation {
152153

153154
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
154155
/// entries in function address translation map.
155-
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
156+
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems) const;
156157

157158
/// Calculate the number of equal offsets (output = input - skew) in the
158159
/// beginning of the function.
@@ -183,14 +184,9 @@ class BoltAddressTranslation {
183184
public:
184185
/// Map basic block input offset to a basic block index and hash pair.
185186
class BBHashMapTy {
186-
class EntryTy {
187+
struct EntryTy {
187188
unsigned Index;
188189
size_t Hash;
189-
190-
public:
191-
unsigned getBBIndex() const { return Index; }
192-
size_t getBBHash() const { return Hash; }
193-
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
194190
};
195191

196192
std::map<uint32_t, EntryTy> Map;
@@ -206,15 +202,15 @@ class BoltAddressTranslation {
206202
}
207203

208204
unsigned getBBIndex(uint32_t BBInputOffset) const {
209-
return getEntry(BBInputOffset).getBBIndex();
205+
return getEntry(BBInputOffset).Index;
210206
}
211207

212208
size_t getBBHash(uint32_t BBInputOffset) const {
213-
return getEntry(BBInputOffset).getBBHash();
209+
return getEntry(BBInputOffset).Hash;
214210
}
215211

216212
void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
217-
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
213+
Map.emplace(BBInputOffset, EntryTy{BBIndex, BBHash});
218214
}
219215

220216
size_t getNumBasicBlocks() const { return Map.size(); }
@@ -226,14 +222,9 @@ class BoltAddressTranslation {
226222

227223
/// Map function output address to its hash and basic blocks hash map.
228224
class FuncHashesTy {
229-
class EntryTy {
225+
struct EntryTy {
230226
size_t Hash;
231227
BBHashMapTy BBHashMap;
232-
233-
public:
234-
size_t getBFHash() const { return Hash; }
235-
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
236-
EntryTy(size_t Hash) : Hash(Hash) {}
237228
};
238229

239230
std::unordered_map<uint64_t, EntryTy> Map;
@@ -245,23 +236,23 @@ class BoltAddressTranslation {
245236

246237
public:
247238
size_t getBFHash(uint64_t FuncOutputAddress) const {
248-
return getEntry(FuncOutputAddress).getBFHash();
239+
return getEntry(FuncOutputAddress).Hash;
249240
}
250241

251242
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
252-
return getEntry(FuncOutputAddress).getBBHashMap();
243+
return getEntry(FuncOutputAddress).BBHashMap;
253244
}
254245

255246
void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
256-
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
247+
Map.emplace(FuncOutputAddress, EntryTy{BFHash, BBHashMapTy()});
257248
}
258249

259250
size_t getNumFunctions() const { return Map.size(); };
260251

261252
size_t getNumBasicBlocks() const {
262253
size_t NumBasicBlocks{0};
263254
for (auto &I : Map)
264-
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
255+
NumBasicBlocks += I.second.BBHashMap.getNumBasicBlocks();
265256
return NumBasicBlocks;
266257
}
267258
};
@@ -283,7 +274,9 @@ class BoltAddressTranslation {
283274

284275
/// Returns the number of basic blocks in a function.
285276
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
286-
return NumBasicBlocksMap.at(OutputAddress);
277+
auto It = NumBasicBlocksMap.find(OutputAddress);
278+
assert(It != NumBasicBlocksMap.end());
279+
return It->second;
287280
}
288281

289282
private:

bolt/lib/Core/BinaryContext.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
142142
AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
143143
InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
144144
MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
145-
Logger(Logger) {
145+
Logger(Logger), InitialDynoStats(isAArch64()) {
146146
Relocation::Arch = this->TheTriple->getArch();
147147
RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
148148
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
@@ -934,10 +934,13 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
934934
uint64_t Offset = 0;
935935
if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
936936
Offset = Address - JT->getAddress();
937-
auto Itr = JT->Labels.find(Offset);
938-
if (Itr != JT->Labels.end())
939-
return std::string(Itr->second->getName());
940-
Id = JumpTableIds.at(JT->getAddress());
937+
auto JTLabelsIt = JT->Labels.find(Offset);
938+
if (JTLabelsIt != JT->Labels.end())
939+
return std::string(JTLabelsIt->second->getName());
940+
941+
auto JTIdsIt = JumpTableIds.find(JT->getAddress());
942+
assert(JTIdsIt != JumpTableIds.end());
943+
Id = JTIdsIt->second;
941944
} else {
942945
Id = JumpTableIds[Address] = BF.JumpTables.size();
943946
}

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,9 @@ void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection,
813813
// determining its destination.
814814
std::map<MCSymbol *, uint64_t> LabelCounts;
815815
if (opts::JumpTables > JTS_SPLIT && !JT.Counts.empty()) {
816-
MCSymbol *CurrentLabel = JT.Labels.at(0);
816+
auto It = JT.Labels.find(0);
817+
assert(It != JT.Labels.end());
818+
MCSymbol *CurrentLabel = It->second;
817819
uint64_t CurrentLabelCount = 0;
818820
for (unsigned Index = 0; Index < JT.Entries.size(); ++Index) {
819821
auto LI = JT.Labels.find(Index * JT.EntrySize);

bolt/lib/Core/DynoStats.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
114114
for (auto &Stat : llvm::reverse(SortedHistogram)) {
115115
OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
116116
Stat.first * opts::DynoStatsScale);
117-
118-
MaxOpcodeHistogramTy MaxMultiMap = OpcodeHistogram.at(Stat.second).second;
117+
auto It = OpcodeHistogram.find(Stat.second);
118+
assert(It != OpcodeHistogram.end());
119+
MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
119120
// Start with function name:BB offset with highest execution count.
120121
for (auto &Max : llvm::reverse(MaxMultiMap)) {
121122
OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)

bolt/lib/Passes/BinaryFunctionCallGraph.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
5656
std::stack<NodeId> Worklist;
5757

5858
for (BinaryFunction *Func : Funcs) {
59-
const NodeId Id = FuncToNodeId.at(Func);
59+
auto It = FuncToNodeId.find(Func);
60+
assert(It != FuncToNodeId.end());
61+
const NodeId Id = It->second;
6062
Worklist.push(Id);
6163
NodeStatus[Id] = NEW;
6264
}

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,23 +1563,28 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
15631563
const bool Ascending =
15641564
opts::DynoStatsSortOrderOpt == opts::DynoStatsSortOrder::Ascending;
15651565

1566-
if (SortAll) {
1567-
llvm::stable_sort(Functions,
1568-
[Ascending, &Stats](const BinaryFunction *A,
1569-
const BinaryFunction *B) {
1570-
return Ascending ? Stats.at(A) < Stats.at(B)
1571-
: Stats.at(B) < Stats.at(A);
1572-
});
1573-
} else {
1574-
llvm::stable_sort(
1575-
Functions, [Ascending, &Stats](const BinaryFunction *A,
1576-
const BinaryFunction *B) {
1577-
const DynoStats &StatsA = Stats.at(A);
1578-
const DynoStats &StatsB = Stats.at(B);
1579-
return Ascending ? StatsA.lessThan(StatsB, opts::PrintSortedBy)
1580-
: StatsB.lessThan(StatsA, opts::PrintSortedBy);
1581-
});
1582-
}
1566+
std::function<bool(const DynoStats &, const DynoStats &)>
1567+
DynoStatsComparator =
1568+
SortAll ? [](const DynoStats &StatsA,
1569+
const DynoStats &StatsB) { return StatsA < StatsB; }
1570+
: [](const DynoStats &StatsA, const DynoStats &StatsB) {
1571+
return StatsA.lessThan(StatsB, opts::PrintSortedBy);
1572+
};
1573+
1574+
llvm::stable_sort(Functions,
1575+
[Ascending, &Stats, DynoStatsComparator](
1576+
const BinaryFunction *A, const BinaryFunction *B) {
1577+
auto StatsItr = Stats.find(A);
1578+
assert(StatsItr != Stats.end());
1579+
const DynoStats &StatsA = StatsItr->second;
1580+
1581+
StatsItr = Stats.find(B);
1582+
assert(StatsItr != Stats.end());
1583+
const DynoStats &StatsB = StatsItr->second;
1584+
1585+
return Ascending ? DynoStatsComparator(StatsA, StatsB)
1586+
: DynoStatsComparator(StatsB, StatsA);
1587+
});
15831588

15841589
BC.outs() << "BOLT-INFO: top functions sorted by ";
15851590
if (SortAll) {

0 commit comments

Comments
 (0)