Skip to content

Commit 846f790

Browse files
[GlobalISel] Combiner: Observer-based DCE and retrying of combines
Continues the work for disabling fixed-point iteration in the Combiner (#94291). This introduces improved Observer-based heuristics in the GISel Combiner to retry combining defs/uses of modified instructions and for performing sparse dead code elimination. I have experimented a lot with the heuristics and this seems to be the minimal set of heuristics that allows disabling fixed-point iteration for AArch64 CTMark O2 without regressions. Enabling this globally would pass all regression tests for all official targets (apart from small benign diffs), but I have made this fully opt-in for now, because I can't quantify the impact for other targets. This should mostly be on-par with how the WorkList-aware functions in the InstCombiner and DAGCombiner handle rescheduling instructions for recombining. For performance numbers see my follow-up patch for AArch64 (#102167) Pull Request: #102163
1 parent f71b638 commit 846f790

File tree

3 files changed

+214
-40
lines changed

3 files changed

+214
-40
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Combiner.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H
1616
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_H
1717

18+
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
1819
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
1920
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
2021
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2122

2223
namespace llvm {
2324
class MachineRegisterInfo;
24-
struct CombinerInfo;
2525
class GISelCSEInfo;
2626
class TargetPassConfig;
2727
class MachineFunction;
@@ -33,8 +33,12 @@ class MachineIRBuilder;
3333
/// TODO: Is it worth making this module-wide?
3434
class Combiner : public GIMatchTableExecutor {
3535
private:
36+
using WorkListTy = GISelWorkList<512>;
37+
3638
class WorkListMaintainer;
37-
GISelWorkList<512> WorkList;
39+
template <CombinerInfo::ObserverLevel Lvl> class WorkListMaintainerImpl;
40+
41+
WorkListTy WorkList;
3842

3943
// We have a little hack here where keep the owned pointers private, and only
4044
// expose a reference. This has two purposes:
@@ -48,6 +52,8 @@ class Combiner : public GIMatchTableExecutor {
4852

4953
bool HasSetupMF = false;
5054

55+
static bool tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI);
56+
5157
public:
5258
/// If CSEInfo is not null, then the Combiner will use CSEInfo as the observer
5359
/// and also create a CSEMIRBuilder. Pass nullptr if CSE is not needed.

llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,31 @@ struct CombinerInfo {
5353
/// The maximum number of times the Combiner will iterate over the
5454
/// MachineFunction. Setting this to 0 enables fixed-point iteration.
5555
unsigned MaxIterations = 0;
56+
57+
enum class ObserverLevel {
58+
/// Only retry combining created/changed instructions.
59+
/// This replicates the legacy default Observer behavior for use with
60+
/// fixed-point iteration.
61+
Basic,
62+
/// Enables Observer-based detection of dead instructions. This can save
63+
/// some compile-time if full disabling of fixed-point iteration is not
64+
/// desired. If the input IR doesn't contain dead instructions, consider
65+
/// disabling \p EnableFullDCE.
66+
DCE,
67+
/// Enables Observer-based DCE and additional heuristics that retry
68+
/// combining defined and used instructions of modified instructions.
69+
/// This provides a good balance between compile-time and completeness of
70+
/// combining without needing fixed-point iteration.
71+
SinglePass,
72+
};
73+
74+
/// Select how the Combiner acts on MIR changes.
75+
ObserverLevel ObserverLvl = ObserverLevel::Basic;
76+
77+
/// Whether dead code elimination is performed before each Combiner iteration.
78+
/// If Observer-based DCE is enabled, this controls if a full DCE pass is
79+
/// performed before the first Combiner iteration.
80+
bool EnableFullDCE = true;
5681
};
5782
} // namespace llvm
5883

llvm/lib/CodeGen/GlobalISel/Combiner.cpp

Lines changed: 181 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -45,61 +45,190 @@ cl::OptionCategory GICombinerOptionCategory(
4545
);
4646
} // end namespace llvm
4747

48-
/// This class acts as the glue the joins the CombinerHelper to the overall
48+
/// This class acts as the glue that joins the CombinerHelper to the overall
4949
/// Combine algorithm. The CombinerHelper is intended to report the
5050
/// modifications it makes to the MIR to the GISelChangeObserver and the
51-
/// observer subclass will act on these events. In this case, instruction
52-
/// erasure will cancel any future visits to the erased instruction and
53-
/// instruction creation will schedule that instruction for a future visit.
54-
/// Other Combiner implementations may require more complex behaviour from
55-
/// their GISelChangeObserver subclass.
51+
/// observer subclass will act on these events.
5652
class Combiner::WorkListMaintainer : public GISelChangeObserver {
57-
using WorkListTy = GISelWorkList<512>;
58-
WorkListTy &WorkList;
53+
protected:
54+
#ifndef NDEBUG
5955
/// The instructions that have been created but we want to report once they
6056
/// have their operands. This is only maintained if debug output is requested.
61-
#ifndef NDEBUG
62-
SetVector<const MachineInstr *> CreatedInstrs;
57+
SmallSetVector<const MachineInstr *, 32> CreatedInstrs;
6358
#endif
59+
using Level = CombinerInfo::ObserverLevel;
6460

6561
public:
66-
WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
62+
static std::unique_ptr<WorkListMaintainer>
63+
create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI);
64+
6765
virtual ~WorkListMaintainer() = default;
6866

67+
void reportFullyCreatedInstrs() {
68+
LLVM_DEBUG({
69+
for (auto *MI : CreatedInstrs) {
70+
dbgs() << "Created: " << *MI;
71+
}
72+
CreatedInstrs.clear();
73+
});
74+
}
75+
76+
virtual void reset() = 0;
77+
virtual void appliedCombine() = 0;
78+
};
79+
80+
/// A configurable WorkListMaintainer implementation.
81+
/// The ObserverLevel determines how the WorkListMaintainer reacts to MIR
82+
/// changes.
83+
template <CombinerInfo::ObserverLevel Lvl>
84+
class Combiner::WorkListMaintainerImpl : public Combiner::WorkListMaintainer {
85+
WorkListTy &WorkList;
86+
MachineRegisterInfo &MRI;
87+
88+
// Defer handling these instructions until the combine finishes.
89+
SmallSetVector<MachineInstr *, 32> DeferList;
90+
91+
// Track VRegs that (might) have lost a use.
92+
SmallSetVector<Register, 32> LostUses;
93+
94+
public:
95+
WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI)
96+
: WorkList(WorkList), MRI(MRI) {}
97+
98+
virtual ~WorkListMaintainerImpl() = default;
99+
100+
void reset() override {
101+
DeferList.clear();
102+
LostUses.clear();
103+
}
104+
69105
void erasingInstr(MachineInstr &MI) override {
70-
LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
106+
// MI will become dangling, remove it from all lists.
107+
LLVM_DEBUG(dbgs() << "Erasing: " << MI; CreatedInstrs.remove(&MI));
71108
WorkList.remove(&MI);
109+
if constexpr (Lvl != Level::Basic) {
110+
DeferList.remove(&MI);
111+
noteLostUses(MI);
112+
}
72113
}
114+
73115
void createdInstr(MachineInstr &MI) override {
74-
LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
75-
WorkList.insert(&MI);
76-
LLVM_DEBUG(CreatedInstrs.insert(&MI));
116+
LLVM_DEBUG(dbgs() << "Creating: " << MI; CreatedInstrs.insert(&MI));
117+
if constexpr (Lvl == Level::Basic)
118+
WorkList.insert(&MI);
119+
else
120+
// Defer handling newly created instructions, because they don't have
121+
// operands yet. We also insert them into the WorkList in reverse
122+
// order so that they will be combined top down.
123+
DeferList.insert(&MI);
77124
}
125+
78126
void changingInstr(MachineInstr &MI) override {
79-
LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
80-
WorkList.insert(&MI);
127+
LLVM_DEBUG(dbgs() << "Changing: " << MI);
128+
// Some uses might get dropped when MI is changed.
129+
// For now, overapproximate by assuming all uses will be dropped.
130+
// TODO: Is a more precise heuristic or manual tracking of use count
131+
// decrements worth it?
132+
if constexpr (Lvl != Level::Basic)
133+
noteLostUses(MI);
81134
}
135+
82136
void changedInstr(MachineInstr &MI) override {
83-
LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
84-
WorkList.insert(&MI);
137+
LLVM_DEBUG(dbgs() << "Changed: " << MI);
138+
if constexpr (Lvl == Level::Basic)
139+
WorkList.insert(&MI);
140+
else
141+
// Defer this for DCE
142+
DeferList.insert(&MI);
85143
}
86144

87-
void reportFullyCreatedInstrs() {
88-
LLVM_DEBUG(for (const auto *MI
89-
: CreatedInstrs) {
90-
dbgs() << "Created: ";
91-
MI->print(dbgs());
92-
});
93-
LLVM_DEBUG(CreatedInstrs.clear());
145+
// Only track changes during the combine and then walk the def/use-chains once
146+
// the combine is finished, because:
147+
// - instructions might have multiple defs during the combine.
148+
// - use counts aren't accurate during the combine.
149+
void appliedCombine() override {
150+
if constexpr (Lvl == Level::Basic)
151+
return;
152+
153+
// DCE deferred instructions and add them to the WorkList bottom up.
154+
while (!DeferList.empty()) {
155+
MachineInstr &MI = *DeferList.pop_back_val();
156+
if (tryDCE(MI, MRI))
157+
continue;
158+
159+
if constexpr (Lvl >= Level::SinglePass)
160+
addUsersToWorkList(MI);
161+
162+
WorkList.insert(&MI);
163+
}
164+
165+
// Handle instructions that have lost a user.
166+
while (!LostUses.empty()) {
167+
Register Use = LostUses.pop_back_val();
168+
MachineInstr *UseMI = MRI.getVRegDef(Use);
169+
if (!UseMI)
170+
continue;
171+
172+
// If DCE succeeds, UseMI's uses are added back to LostUses by
173+
// erasingInstr.
174+
if (tryDCE(*UseMI, MRI))
175+
continue;
176+
177+
if constexpr (Lvl >= Level::SinglePass) {
178+
// OneUse checks are relatively common, so we might be able to combine
179+
// the single remaining user of this Reg.
180+
if (MRI.hasOneNonDBGUser(Use))
181+
WorkList.insert(&*MRI.use_instr_nodbg_begin(Use));
182+
183+
WorkList.insert(UseMI);
184+
}
185+
}
186+
}
187+
188+
void noteLostUses(MachineInstr &MI) {
189+
for (auto &Use : MI.explicit_uses()) {
190+
if (!Use.isReg() || !Use.getReg().isVirtual())
191+
continue;
192+
LostUses.insert(Use.getReg());
193+
}
194+
}
195+
196+
void addUsersToWorkList(MachineInstr &MI) {
197+
for (auto &Def : MI.defs()) {
198+
Register DefReg = Def.getReg();
199+
if (!DefReg.isVirtual())
200+
continue;
201+
for (auto &UseMI : MRI.use_nodbg_instructions(DefReg)) {
202+
WorkList.insert(&UseMI);
203+
}
204+
}
94205
}
95206
};
96207

208+
std::unique_ptr<Combiner::WorkListMaintainer>
209+
Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList,
210+
MachineRegisterInfo &MRI) {
211+
switch (Lvl) {
212+
case Level::Basic:
213+
return std::make_unique<WorkListMaintainerImpl<Level::Basic>>(WorkList,
214+
MRI);
215+
case Level::DCE:
216+
return std::make_unique<WorkListMaintainerImpl<Level::DCE>>(WorkList, MRI);
217+
case Level::SinglePass:
218+
return std::make_unique<WorkListMaintainerImpl<Level::SinglePass>>(WorkList,
219+
MRI);
220+
default:
221+
llvm_unreachable("Illegal ObserverLevel");
222+
}
223+
}
224+
97225
Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
98226
const TargetPassConfig *TPC, GISelKnownBits *KB,
99227
GISelCSEInfo *CSEInfo)
100228
: Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>()
101229
: std::make_unique<MachineIRBuilder>()),
102-
WLObserver(std::make_unique<WorkListMaintainer>(WorkList)),
230+
WLObserver(WorkListMaintainer::create(CInfo.ObserverLvl, WorkList,
231+
MF.getRegInfo())),
103232
ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo),
104233
Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()),
105234
KB(KB), TPC(TPC), CSEInfo(CSEInfo) {
@@ -115,6 +244,15 @@ Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
115244

116245
Combiner::~Combiner() = default;
117246

247+
bool Combiner::tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI) {
248+
if (!isTriviallyDead(MI, MRI))
249+
return false;
250+
LLVM_DEBUG(dbgs() << "Dead: " << MI);
251+
llvm::salvageDebugInfo(MRI, MI);
252+
MI.eraseFromParent();
253+
return true;
254+
}
255+
118256
bool Combiner::combineMachineInstrs() {
119257
// If the ISel pipeline failed, do not bother running this pass.
120258
// FIXME: Should this be here or in individual combiner passes.
@@ -141,27 +279,29 @@ bool Combiner::combineMachineInstrs() {
141279
++Iteration;
142280
LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
143281

282+
Changed = false;
144283
WorkList.clear();
284+
WLObserver->reset();
145285
ObserverWrapper->clearObservers();
146286
if (CSEInfo)
147287
ObserverWrapper->addObserver(CSEInfo);
148288

289+
// If Observer-based DCE is enabled, perform full DCE only before the first
290+
// iteration.
291+
bool EnableDCE = CInfo.ObserverLvl >= CombinerInfo::ObserverLevel::DCE
292+
? CInfo.EnableFullDCE && Iteration == 1
293+
: CInfo.EnableFullDCE;
294+
149295
// Collect all instructions. Do a post order traversal for basic blocks and
150296
// insert with list bottom up, so while we pop_back_val, we'll traverse top
151297
// down RPOT.
152-
Changed = false;
153-
154298
RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper);
155299
for (MachineBasicBlock *MBB : post_order(&MF)) {
156300
for (MachineInstr &CurMI :
157301
llvm::make_early_inc_range(llvm::reverse(*MBB))) {
158302
// Erase dead insts before even adding to the list.
159-
if (isTriviallyDead(CurMI, MRI)) {
160-
LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
161-
llvm::salvageDebugInfo(MRI, CurMI);
162-
CurMI.eraseFromParent();
303+
if (EnableDCE && tryDCE(CurMI, MRI))
163304
continue;
164-
}
165305
WorkList.deferred_insert(&CurMI);
166306
}
167307
}
@@ -171,10 +311,13 @@ bool Combiner::combineMachineInstrs() {
171311
ObserverWrapper->addObserver(WLObserver.get());
172312
// Main Loop. Process the instructions here.
173313
while (!WorkList.empty()) {
174-
MachineInstr *CurrInst = WorkList.pop_back_val();
175-
LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
176-
Changed |= tryCombineAll(*CurrInst);
177-
WLObserver->reportFullyCreatedInstrs();
314+
MachineInstr &CurrInst = *WorkList.pop_back_val();
315+
LLVM_DEBUG(dbgs() << "\nTry combining " << CurrInst);
316+
bool AppliedCombine = tryCombineAll(CurrInst);
317+
LLVM_DEBUG(WLObserver->reportFullyCreatedInstrs());
318+
Changed |= AppliedCombine;
319+
if (AppliedCombine)
320+
WLObserver->appliedCombine();
178321
}
179322
MFChanged |= Changed;
180323

0 commit comments

Comments
 (0)