Skip to content

[BOLT] Drop macro-fusion alignment #97358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions bolt/include/bolt/Core/BinaryBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -842,15 +842,6 @@ class BinaryBasicBlock {
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch, MCInst *&UncondBranch);

/// Return true if iterator \p I is pointing to the first instruction in
/// a pair that could be macro-fused.
bool isMacroOpFusionPair(const_iterator I) const;

/// If the basic block has a pair of instructions suitable for macro-fusion,
/// return iterator to the first instruction of the pair.
/// Otherwise return end().
const_iterator getMacroOpFusionPair() const;

/// Printer required for printing dominator trees.
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
if (PrintType)
Expand Down
4 changes: 0 additions & 4 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,10 +698,6 @@ class BinaryContext {

/// Binary-wide aggregated stats.
struct BinaryStats {
/// Stats for macro-fusion.
uint64_t MissedMacroFusionPairs{0};
uint64_t MissedMacroFusionExecCount{0};

/// Stats for stale profile matching:
/// the total number of basic blocks in the profile
uint32_t NumStaleBlocks{0};
Expand Down
4 changes: 0 additions & 4 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,10 +835,6 @@ class BinaryFunction {
/// them.
void calculateLoopInfo();

/// Calculate missed macro-fusion opportunities and update BinaryContext
/// stats.
void calculateMacroOpFusionStats();

/// Returns if BinaryDominatorTree has been constructed for this function.
bool hasDomTree() const { return BDT != nullptr; }

Expand Down
7 changes: 0 additions & 7 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -917,13 +917,6 @@ class MCPlusBuilder {
/// Return true if the instruction is encoded using EVEX (AVX-512).
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }

/// Return true if a pair of instructions represented by \p Insts
/// could be fused into a single uop.
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
llvm_unreachable("not implemented");
return false;
}

struct X86MemOperand {
unsigned BaseRegNum;
int64_t ScaleImm;
Expand Down
39 changes: 0 additions & 39 deletions bolt/lib/Core/BinaryBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
CondBranch, UncondBranch);
}

bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
auto &MIB = Function->getBinaryContext().MIB;
ArrayRef<MCInst> Insts = Instructions;
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
}

BinaryBasicBlock::const_iterator
BinaryBasicBlock::getMacroOpFusionPair() const {
if (!Function->getBinaryContext().isX86())
return end();

if (getNumNonPseudos() < 2 || succ_size() != 2)
return end();

auto RI = getLastNonPseudo();
assert(RI != rend() && "cannot have an empty block with 2 successors");

BinaryContext &BC = Function->getBinaryContext();

// Skip instruction if it's an unconditional branch following
// a conditional one.
if (BC.MIB->isUnconditionalBranch(*RI))
++RI;

if (!BC.MIB->isConditionalBranch(*RI))
return end();

// Start checking with instruction preceding the conditional branch.
++RI;
if (RI == rend())
return end();

auto II = std::prev(RI.base()); // convert to a forward iterator
if (isMacroOpFusionPair(II))
return II;

return end();
}

MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
BinaryContext &BC = Function->getBinaryContext();
auto Itr = rbegin();
Expand Down
37 changes: 0 additions & 37 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));

cl::opt<MacroFusionType>
AlignMacroOpFusion("align-macro-fusion",
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
cl::init(MFT_HOT),
cl::values(clEnumValN(MFT_NONE, "none",
"do not insert alignment no-ops for macro-fusion"),
clEnumValN(MFT_HOT, "hot",
"only insert alignment no-ops on hot execution paths (default)"),
clEnumValN(MFT_ALL, "all",
"always align instructions to allow macro-fusion")),
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));

static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
Expand Down Expand Up @@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
Streamer.emitLabel(EntrySymbol);
}

// Check if special alignment for macro-fusion is needed.
bool MayNeedMacroFusionAlignment =
(opts::AlignMacroOpFusion == MFT_ALL) ||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
BinaryBasicBlock::const_iterator MacroFusionPair;
if (MayNeedMacroFusionAlignment) {
MacroFusionPair = BB->getMacroOpFusionPair();
if (MacroFusionPair == BB->end())
MayNeedMacroFusionAlignment = false;
}

SMLoc LastLocSeen;
// Remember if the last instruction emitted was a prefix.
bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;

Expand All @@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
continue;
}

// Handle macro-fusion alignment. If we emitted a prefix as
// the last instruction, we should've already emitted the associated
// alignment hint, so don't emit it twice.
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
I == MacroFusionPair) {
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
}

if (!EmitCodeOnly) {
// A symbol to be emitted before the instruction to mark its location.
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
Expand Down Expand Up @@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
}

Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}

Expand Down
25 changes: 0 additions & 25 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2276,8 +2276,6 @@ void BinaryFunction::postProcessCFG() {
postProcessBranches();
}

calculateMacroOpFusionStats();

// The final cleanup of intermediate structures.
clearList(IgnoredBranches);

Expand All @@ -2294,29 +2292,6 @@ void BinaryFunction::postProcessCFG() {
"invalid CFG detected after post-processing");
}

void BinaryFunction::calculateMacroOpFusionStats() {
if (!getBinaryContext().isX86())
return;
for (const BinaryBasicBlock &BB : blocks()) {
auto II = BB.getMacroOpFusionPair();
if (II == BB.end())
continue;

// Check offset of the second instruction.
// FIXME: arch-specific.
const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
if (!Offset || (getAddress() + Offset) % 64)
continue;

LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
<< Twine::utohexstr(getAddress() + Offset)
<< " in function " << *this << "; executed "
<< BB.getKnownExecutionCount() << " times.\n");
++BC.Stats.MissedMacroFusionPairs;
BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
}
}

void BinaryFunction::removeTagsFromProfile() {
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
Expand Down
20 changes: 0 additions & 20 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;

extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> EnableBAT;
extern cl::opt<unsigned> ExecutionCountThreshold;
Expand Down Expand Up @@ -1635,25 +1634,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
}
}

// Print information on missed macro-fusion opportunities seen on input.
if (BC.Stats.MissedMacroFusionPairs) {
BC.outs() << format(
"BOLT-INFO: the input contains %zu (dynamic count : %zu)"
" opportunities for macro-fusion optimization",
BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
switch (opts::AlignMacroOpFusion) {
case MFT_NONE:
BC.outs() << ". Use -align-macro-fusion to fix.\n";
break;
case MFT_HOT:
BC.outs() << ". Will fix instances on a hot path.\n";
break;
case MFT_ALL:
BC.outs() << " that are going to be fixed\n";
break;
}
}

// Collect and print information about suboptimal code layout on input.
if (opts::ReportBadLayout) {
std::vector<BinaryFunction *> SuboptimalFuncs;
Expand Down
22 changes: 0 additions & 22 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;

namespace opts {

extern cl::opt<MacroFusionType> AlignMacroOpFusion;
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
Expand Down Expand Up @@ -1972,12 +1971,6 @@ void RewriteInstance::adjustCommandLineOptions() {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->adjustCommandLineOptions(*BC);

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
BC->outs()
<< "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (BC->isX86() && BC->MAB->allowAutoPadding()) {
if (!BC->HasRelocations) {
BC->errs()
Expand All @@ -1988,13 +1981,6 @@ void RewriteInstance::adjustCommandLineOptions() {
BC->outs()
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
"may take several minutes\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
"mode\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::SplitEH && !BC->HasRelocations) {
Expand All @@ -2016,14 +2002,6 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = true;
}

if (BC->isX86() && BC->HasRelocations &&
opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
BC->outs()
<< "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
"was specified\n";
opts::AlignMacroOpFusion = MFT_ALL;
}

if (!BC->HasRelocations &&
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
BC->errs() << "BOLT-ERROR: function reordering only works when "
Expand Down
4 changes: 0 additions & 4 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
*AArch64ExprB.getSubExpr(), Comp);
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
return false;
}

bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
return false;
}
Expand Down
34 changes: 0 additions & 34 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,40 +661,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
const auto *I = Insts.begin();
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;

const MCInst &FirstInst = *I;
++I;
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;
const MCInst &SecondInst = *I;

if (!isConditionalBranch(SecondInst))
return false;
// Cannot fuse if the first instruction uses RIP-relative memory.
if (hasPCRelOperand(FirstInst))
return false;

const X86::FirstMacroFusionInstKind CmpKind =
X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
return false;

X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
X86::SecondMacroFusionInstKind BranchKind =
X86::classifySecondCondCodeInMacroFusion(CC);
if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
return false;
return X86::isMacroFused(CmpKind, BranchKind);
}

std::optional<X86MemOperand>
evaluateX86MemoryOperand(const MCInst &Inst) const override {
int MemOpNo = getMemoryOperandNo(Inst);
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.