Skip to content

Commit 24ba96a

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:344228ebf45f9bd1f7626fdcd3c0fada0f0c8385 into amd-gfx:d6bc6d5dbcb2
Local branch amd-gfx d6bc6d5 Merge main into amd-gfx Remote branch main 344228e [BOLT] Drop macro-fusion alignment (llvm#97358)
2 parents d6bc6d5 + 344228e commit 24ba96a

File tree

2,749 files changed

+95144
-36346
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,749 files changed

+95144
-36346
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ clang/test/AST/Interp/ @tbaederr
120120
/mlir/**/LLVMIR/**/BasicPtxBuilderInterface* @grypp
121121
/mlir/**/NVVM* @grypp
122122

123+
# MLIR Index Dialect
124+
/mlir/**/Index* @mogball
125+
123126
# MLIR Python Bindings
124127
/mlir/test/python/ @ftynse @makslevental @stellaraccident
125128
/mlir/python/ @ftynse @makslevental @stellaraccident

.github/workflows/issue-write.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on:
55
workflows:
66
- "Check code formatting"
77
- "Check for private emails used in PRs"
8+
- "PR Request Release Note"
89
types:
910
- completed
1011

@@ -17,7 +18,11 @@ jobs:
1718
permissions:
1819
pull-requests: write
1920
if: >
20-
github.event.workflow_run.event == 'pull_request'
21+
github.event.workflow_run.event == 'pull_request' &&
22+
(
23+
github.event.workflow_run.conclusion == 'success' ||
24+
github.event.workflow_run.conclusion == 'failure'
25+
)
2126
steps:
2227
- name: 'Download artifact'
2328
uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
@@ -92,7 +97,11 @@ jobs:
9297
9398
var pr_number = 0;
9499
gql_result.repository.ref.associatedPullRequests.nodes.forEach((pr) => {
95-
if (pr.baseRepository.owner.login = context.repo.owner && pr.state == 'OPEN') {
100+
101+
// The largest PR number is the one we care about. The only way
102+
// to have more than one associated pull requests is if all the
103+
// old pull requests are in the closed state.
104+
if (pr.baseRepository.owner.login = context.repo.owner && pr.number > pr_number) {
96105
pr_number = pr.number;
97106
}
98107
});

.github/workflows/pr-request-release-note.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: PR Request Release Note
22

33
permissions:
44
contents: read
5-
pull-requests: write
65

76
on:
87
pull_request:
@@ -41,3 +40,10 @@ jobs:
4140
--token "$GITHUB_TOKEN" \
4241
request-release-note \
4342
--pr-number ${{ github.event.pull_request.number}}
43+
44+
- uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
45+
if: always()
46+
with:
47+
name: workflow-args
48+
path: |
49+
comments

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,10 @@
259259

260260
Always use long jumps/nops for Linux kernel static keys
261261

262+
- `--match-profile-with-function-hash`
263+
264+
Match profile with function hash
265+
262266
- `--max-data-relocations=<uint>`
263267

264268
Maximum number of data relocations to process

bolt/include/bolt/Core/BinaryBasicBlock.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -842,15 +842,6 @@ class BinaryBasicBlock {
842842
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
843843
MCInst *&CondBranch, MCInst *&UncondBranch);
844844

845-
/// Return true if iterator \p I is pointing to the first instruction in
846-
/// a pair that could be macro-fused.
847-
bool isMacroOpFusionPair(const_iterator I) const;
848-
849-
/// If the basic block has a pair of instructions suitable for macro-fusion,
850-
/// return iterator to the first instruction of the pair.
851-
/// Otherwise return end().
852-
const_iterator getMacroOpFusionPair() const;
853-
854845
/// Printer required for printing dominator trees.
855846
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
856847
if (PrintType)

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -698,10 +698,6 @@ class BinaryContext {
698698

699699
/// Binary-wide aggregated stats.
700700
struct BinaryStats {
701-
/// Stats for macro-fusion.
702-
uint64_t MissedMacroFusionPairs{0};
703-
uint64_t MissedMacroFusionExecCount{0};
704-
705701
/// Stats for stale profile matching:
706702
/// the total number of basic blocks in the profile
707703
uint32_t NumStaleBlocks{0};

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,6 @@ class BinaryFunction {
835835
/// them.
836836
void calculateLoopInfo();
837837

838-
/// Calculate missed macro-fusion opportunities and update BinaryContext
839-
/// stats.
840-
void calculateMacroOpFusionStats();
841-
842838
/// Returns if BinaryDominatorTree has been constructed for this function.
843839
bool hasDomTree() const { return BDT != nullptr; }
844840

@@ -930,6 +926,10 @@ class BinaryFunction {
930926
return const_cast<BinaryFunction *>(this)->getInstructionAtOffset(Offset);
931927
}
932928

929+
/// When the function is in disassembled state, return an instruction that
930+
/// contains the \p Offset.
931+
MCInst *getInstructionContainingOffset(uint64_t Offset);
932+
933933
std::optional<MCInst> disassembleInstructionAtOffset(uint64_t Offset) const;
934934

935935
/// Return offset for the first instruction. If there is data at the

bolt/include/bolt/Passes/BinaryFunctionCallGraph.h renamed to bolt/include/bolt/Core/BinaryFunctionCallGraph.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- bolt/Passes/CallGraph.h ----------------------------------*- C++ -*-===//
1+
//===- bolt/Core/CallGraph.h ----------------------------------*- C++ -*-===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,7 @@
99
#ifndef BOLT_PASSES_BINARY_FUNCTION_CALLGRAPH_H
1010
#define BOLT_PASSES_BINARY_FUNCTION_CALLGRAPH_H
1111

12-
#include "bolt/Passes/CallGraph.h"
12+
#include "bolt/Core/CallGraph.h"
1313
#include <deque>
1414
#include <functional>
1515
#include <unordered_map>

bolt/include/bolt/Passes/CallGraph.h renamed to bolt/include/bolt/Core/CallGraph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- bolt/Passes/CallGraph.h ----------------------------------*- C++ -*-===//
1+
//===- bolt/Core/CallGraph.h ----------------------------------*- C++ -*-===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

bolt/include/bolt/Passes/CallGraphWalker.h renamed to bolt/include/bolt/Core/CallGraphWalker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- bolt/Passes/CallGraphWalker.h ----------------------------*- C++ -*-===//
1+
//===- bolt/Core/CallGraphWalker.h ----------------------------*- C++ -*-===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,20 @@ class MCPlusBuilder {
439439
}
440440

441441
/// Check whether this conditional branch can be reversed
442-
virtual bool isReversibleBranch(const MCInst &Inst) const { return true; }
442+
virtual bool isReversibleBranch(const MCInst &Inst) const {
443+
assert(!isUnsupportedInstruction(Inst) && isConditionalBranch(Inst) &&
444+
"Instruction is not known conditional branch");
445+
446+
if (isDynamicBranch(Inst))
447+
return false;
448+
return true;
449+
}
450+
451+
/// Return true if this instruction inhibits analysis of the containing
452+
/// function.
453+
virtual bool isUnsupportedInstruction(const MCInst &Inst) const {
454+
return false;
455+
}
443456

444457
/// Return true of the instruction is of pseudo kind.
445458
virtual bool isPseudo(const MCInst &Inst) const {
@@ -917,13 +930,6 @@ class MCPlusBuilder {
917930
/// Return true if the instruction is encoded using EVEX (AVX-512).
918931
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
919932

920-
/// Return true if a pair of instructions represented by \p Insts
921-
/// could be fused into a single uop.
922-
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
923-
llvm_unreachable("not implemented");
924-
return false;
925-
}
926-
927933
struct X86MemOperand {
928934
unsigned BaseRegNum;
929935
int64_t ScaleImm;

bolt/include/bolt/Passes/HFSort.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#ifndef BOLT_PASSES_HFSORT_H
2020
#define BOLT_PASSES_HFSORT_H
2121

22-
#include "bolt/Passes/CallGraph.h"
22+
#include "bolt/Core/CallGraph.h"
2323

2424
#include <string>
2525
#include <vector>

bolt/include/bolt/Passes/RegReAssign.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#ifndef BOLT_PASSES_REGREASSIGN_H
1010
#define BOLT_PASSES_REGREASSIGN_H
1111

12-
#include "bolt/Passes/BinaryFunctionCallGraph.h"
12+
#include "bolt/Core/BinaryFunctionCallGraph.h"
1313
#include "bolt/Passes/BinaryPasses.h"
1414
#include "bolt/Passes/RegAnalysis.h"
1515

bolt/include/bolt/Passes/ReorderFunctions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#ifndef BOLT_PASSES_REORDER_FUNCTIONS_H
1010
#define BOLT_PASSES_REORDER_FUNCTIONS_H
1111

12-
#include "bolt/Passes/BinaryFunctionCallGraph.h"
12+
#include "bolt/Core/BinaryFunctionCallGraph.h"
1313
#include "bolt/Passes/BinaryPasses.h"
1414

1515
namespace llvm {

bolt/include/bolt/Rewrite/DWARFRewriter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "llvm/ADT/StringRef.h"
1717
#include "llvm/CodeGen/DIE.h"
1818
#include "llvm/DWP/DWP.h"
19-
#include "llvm/MC/MCAsmLayout.h"
2019
#include "llvm/MC/MCContext.h"
2120
#include "llvm/Support/ToolOutputFile.h"
2221
#include <cstdint>
@@ -183,7 +182,7 @@ class DWARFRewriter {
183182
void updateDebugInfo();
184183

185184
/// Update stmt_list for CUs based on the new .debug_line \p Layout.
186-
void updateLineTableOffsets(const MCAsmLayout &Layout);
185+
void updateLineTableOffsets(const MCAssembler &Asm);
187186

188187
uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; }
189188

bolt/lib/Core/BinaryBasicBlock.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
404404
CondBranch, UncondBranch);
405405
}
406406

407-
bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
408-
auto &MIB = Function->getBinaryContext().MIB;
409-
ArrayRef<MCInst> Insts = Instructions;
410-
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
411-
}
412-
413-
BinaryBasicBlock::const_iterator
414-
BinaryBasicBlock::getMacroOpFusionPair() const {
415-
if (!Function->getBinaryContext().isX86())
416-
return end();
417-
418-
if (getNumNonPseudos() < 2 || succ_size() != 2)
419-
return end();
420-
421-
auto RI = getLastNonPseudo();
422-
assert(RI != rend() && "cannot have an empty block with 2 successors");
423-
424-
BinaryContext &BC = Function->getBinaryContext();
425-
426-
// Skip instruction if it's an unconditional branch following
427-
// a conditional one.
428-
if (BC.MIB->isUnconditionalBranch(*RI))
429-
++RI;
430-
431-
if (!BC.MIB->isConditionalBranch(*RI))
432-
return end();
433-
434-
// Start checking with instruction preceding the conditional branch.
435-
++RI;
436-
if (RI == rend())
437-
return end();
438-
439-
auto II = std::prev(RI.base()); // convert to a forward iterator
440-
if (isMacroOpFusionPair(II))
441-
return II;
442-
443-
return end();
444-
}
445-
446407
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
447408
BinaryContext &BC = Function->getBinaryContext();
448409
auto Itr = rbegin();

bolt/lib/Core/BinaryContext.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,14 +2422,14 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
24222422
// Obtain fragment sizes.
24232423
std::vector<uint64_t> FragmentSizes;
24242424
// Main fragment size.
2425-
const uint64_t HotSize =
2426-
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2425+
const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2426+
Assembler.getSymbolOffset(*StartLabel);
24272427
FragmentSizes.push_back(HotSize);
24282428
// Split fragment sizes.
24292429
uint64_t ColdSize = 0;
24302430
for (const auto &Labels : SplitLabels) {
2431-
uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2432-
Layout.getSymbolOffset(*Labels.first);
2431+
uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2432+
Assembler.getSymbolOffset(*Labels.first);
24332433
FragmentSizes.push_back(Size);
24342434
ColdSize += Size;
24352435
}
@@ -2439,7 +2439,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
24392439
for (FunctionFragment &FF : BF.getLayout().fragments()) {
24402440
BinaryBasicBlock *PrevBB = nullptr;
24412441
for (BinaryBasicBlock *BB : FF) {
2442-
const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2442+
const uint64_t BBStartOffset =
2443+
Assembler.getSymbolOffset(*(BB->getLabel()));
24432444
BB->setOutputStartAddress(BBStartOffset);
24442445
if (PrevBB)
24452446
PrevBB->setOutputEndAddress(BBStartOffset);

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
3838
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
3939
cl::cat(BoltOptCategory));
4040

41-
cl::opt<MacroFusionType>
42-
AlignMacroOpFusion("align-macro-fusion",
43-
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
44-
cl::init(MFT_HOT),
45-
cl::values(clEnumValN(MFT_NONE, "none",
46-
"do not insert alignment no-ops for macro-fusion"),
47-
clEnumValN(MFT_HOT, "hot",
48-
"only insert alignment no-ops on hot execution paths (default)"),
49-
clEnumValN(MFT_ALL, "all",
50-
"always align instructions to allow macro-fusion")),
51-
cl::ZeroOrMore,
52-
cl::cat(BoltRelocCategory));
53-
5441
static cl::list<std::string>
5542
BreakFunctionNames("break-funcs",
5643
cl::CommaSeparated,
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
453440
Streamer.emitLabel(EntrySymbol);
454441
}
455442

456-
// Check if special alignment for macro-fusion is needed.
457-
bool MayNeedMacroFusionAlignment =
458-
(opts::AlignMacroOpFusion == MFT_ALL) ||
459-
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
460-
BinaryBasicBlock::const_iterator MacroFusionPair;
461-
if (MayNeedMacroFusionAlignment) {
462-
MacroFusionPair = BB->getMacroOpFusionPair();
463-
if (MacroFusionPair == BB->end())
464-
MayNeedMacroFusionAlignment = false;
465-
}
466-
467443
SMLoc LastLocSeen;
468-
// Remember if the last instruction emitted was a prefix.
469-
bool LastIsPrefix = false;
470444
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
471445
MCInst &Instr = *I;
472446

@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
479453
continue;
480454
}
481455

482-
// Handle macro-fusion alignment. If we emitted a prefix as
483-
// the last instruction, we should've already emitted the associated
484-
// alignment hint, so don't emit it twice.
485-
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
486-
I == MacroFusionPair) {
487-
// This assumes the second instruction in the macro-op pair will get
488-
// assigned to its own MCRelaxableFragment. Since all JCC instructions
489-
// are relaxable, we should be safe.
490-
}
491-
492456
if (!EmitCodeOnly) {
493457
// A symbol to be emitted before the instruction to mark its location.
494458
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
525489
}
526490

527491
Streamer.emitInstruction(Instr, *BC.STI);
528-
LastIsPrefix = BC.MIB->isPrefix(Instr);
529492
}
530493
}
531494

0 commit comments

Comments
 (0)