Skip to content

Commit dd33574

Browse files
committed
[CodeGen][NPM] Support CodeGenSCCOrder in pipeline
pb/codegenscc-order
1 parent 87b4cac commit dd33574

File tree

3 files changed

+219
-16
lines changed

3 files changed

+219
-16
lines changed

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/Analysis/AliasAnalysis.h"
2020
#include "llvm/Analysis/BasicAliasAnalysis.h"
21+
#include "llvm/Analysis/CGSCCPassManager.h"
2122
#include "llvm/Analysis/ProfileSummaryInfo.h"
2223
#include "llvm/Analysis/ScopedNoAliasAA.h"
2324
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -210,10 +211,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
210211
class AddIRPass {
211212
public:
212213
AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {}
213-
~AddIRPass() {
214-
if (!FPM.isEmpty())
215-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
216-
}
214+
~AddIRPass() { flushFPMToMPM(); }
217215

218216
template <typename PassT>
219217
void operator()(PassT &&Pass, StringRef Name = PassT::name()) {
@@ -231,16 +229,40 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
231229
FPM.addPass(std::forward<PassT>(Pass));
232230
} else {
233231
// Add Module Pass
234-
if (!FPM.isEmpty()) {
235-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
236-
FPM = FunctionPassManager();
237-
}
238-
232+
flushFPMToMPM();
239233
MPM.addPass(std::forward<PassT>(Pass));
240234
}
241235
}
242236

237+
/// Setting this will add passes to the CGSCC pass manager.
238+
void requireCGSCCOrder() {
239+
if (PB.AddInCGSCCOrder)
240+
return;
241+
flushFPMToMPM();
242+
PB.AddInCGSCCOrder = true;
243+
}
244+
245+
/// Stop adding passes to the CGSCC pass manager.
246+
/// Existing passes won't be removed.
247+
void stopAddingInCGSCCOrder() {
248+
if (!PB.AddInCGSCCOrder)
249+
return;
250+
flushFPMToMPM();
251+
PB.AddInCGSCCOrder = false;
252+
}
253+
243254
private:
255+
void flushFPMToMPM() {
256+
if (!FPM.isEmpty()) {
257+
if (PB.AddInCGSCCOrder) {
258+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
259+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
260+
} else {
261+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
262+
}
263+
FPM = FunctionPassManager();
264+
}
265+
}
244266
ModulePassManager &MPM;
245267
FunctionPassManager FPM;
246268
const DerivedT &PB;
@@ -257,7 +279,11 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
257279
FPM.addPass(
258280
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
259281
FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
260-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
282+
if (this->PB.AddInCGSCCOrder) {
283+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
284+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
285+
} else
286+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
261287
}
262288
}
263289

@@ -276,20 +302,47 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
276302
MFPM.addPass(std::forward<PassT>(Pass));
277303
} else {
278304
// Add Module Pass
279-
if (!MFPM.isEmpty()) {
280-
MPM.addPass(createModuleToFunctionPassAdaptor(
281-
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
282-
MFPM = MachineFunctionPassManager();
283-
}
284-
305+
flushMFPMToMPM();
285306
MPM.addPass(std::forward<PassT>(Pass));
286307
}
287308

288309
for (auto &C : PB.AfterCallbacks)
289310
C(Name, MFPM);
290311
}
291312

313+
/// Setting this will add passes to the CGSCC pass manager.
314+
void requireCGSCCOrder() {
315+
if (PB.AddInCGSCCOrder)
316+
return;
317+
flushMFPMToMPM();
318+
PB.AddInCGSCCOrder = true;
319+
}
320+
321+
/// Stop adding passes to the CGSCC pass manager.
322+
/// Existing passes won't be removed.
323+
void stopAddingInCGSCCOrder() {
324+
if (!PB.AddInCGSCCOrder)
325+
return;
326+
flushMFPMToMPM();
327+
PB.AddInCGSCCOrder = false;
328+
}
329+
292330
private:
331+
void flushMFPMToMPM() {
332+
if (!MFPM.isEmpty()) {
333+
if (PB.AddInCGSCCOrder) {
334+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
335+
createCGSCCToFunctionPassAdaptor(
336+
createFunctionToMachineFunctionPassAdaptor(
337+
std::move(MFPM)))));
338+
} else {
339+
MPM.addPass(createModuleToFunctionPassAdaptor(
340+
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
341+
}
342+
MFPM = MachineFunctionPassManager();
343+
}
344+
}
345+
293346
ModulePassManager &MPM;
294347
MachineFunctionPassManager MFPM;
295348
const DerivedT &PB;
@@ -555,6 +608,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
555608
/// Helper variable for `-start-before/-start-after/-stop-before/-stop-after`
556609
mutable bool Started = true;
557610
mutable bool Stopped = true;
611+
mutable bool AddInCGSCCOrder = false;
558612
};
559613

560614
template <typename Derived, typename TargetMachineT>
@@ -813,6 +867,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
813867
AddIRPass &addPass) const {
814868
derived().addPreISel(addPass);
815869

870+
if (Opt.RequiresCodeGenSCCOrder)
871+
addPass.requireCGSCCOrder();
872+
816873
addPass(CallBrPreparePass());
817874
// Add both the safe stack and the stack protection passes: each of them will
818875
// only protect functions that have corresponding attributes.

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,6 +2079,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
20792079
// being run on them, which causes crashes in the resource usage analysis).
20802080
addPass(AMDGPULowerBufferFatPointersPass(TM));
20812081

2082+
addPass.requireCGSCCOrder();
2083+
20822084
Base::addCodeGenPrepare(addPass);
20832085

20842086
if (isPassEnabled(EnableLoadStoreVectorizer))
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; UNSUPPORTED: expensive_checks
2+
; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \
3+
; RUN: | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s
4+
5+
; REQUIRES: asserts
6+
7+
; GCN-O3: require<MachineModuleAnalysis>
8+
; GCN-O3-NEXT: require<profile-summary>
9+
; GCN-O3-NEXT: require<collector-metadata>
10+
; GCN-O3-NEXT: pre-isel-intrinsic-lowering
11+
; GCN-O3-NEXT: function(expand-large-div-rem
12+
; GCN-O3-NEXT: expand-fp)
13+
; GCN-O3-NEXT: amdgpu-remove-incompatible-functions
14+
; GCN-O3-NEXT: amdgpu-printf-runtime-binding
15+
; GCN-O3-NEXT: amdgpu-lower-ctor-dtor
16+
; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt)
17+
; GCN-O3-NEXT: expand-variadics
18+
; GCN-O3-NEXT: amdgpu-always-inline
19+
; GCN-O3-NEXT: always-inline
20+
; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles
21+
; GCN-O3-NEXT: amdgpu-sw-lower-lds
22+
; GCN-O3-NEXT: amdgpu-lower-module-lds
23+
; GCN-O3-NEXT: function(infer-address-spaces
24+
; GCN-O3-NEXT: amdgpu-atomic-optimizer
25+
; GCN-O3-NEXT: atomic-expand
26+
; GCN-O3-NEXT: amdgpu-promote-alloca
27+
; GCN-O3-NEXT: separate-const-offset-from-gep<>
28+
; GCN-O3-NEXT: slsr
29+
; GCN-O3-NEXT: gvn<>
30+
; GCN-O3-NEXT: nary-reassociate
31+
; GCN-O3-NEXT: early-cse<>
32+
; GCN-O3-NEXT: amdgpu-codegenprepare
33+
; GCN-O3-NEXT: loop-mssa(loop-reduce)
34+
; GCN-O3-NEXT: mergeicmps
35+
; GCN-O3-NEXT: expand-memcmp
36+
; GCN-O3-NEXT: gc-lowering
37+
; GCN-O3-NEXT: lower-constant-intrinsics
38+
; GCN-O3-NEXT: UnreachableBlockElimPass
39+
; GCN-O3-NEXT: consthoist
40+
; GCN-O3-NEXT: ReplaceWithVeclib
41+
; GCN-O3-NEXT: partially-inline-libcalls
42+
; GCN-O3-NEXT: ee-instrument<post-inline>
43+
; GCN-O3-NEXT: scalarize-masked-mem-intrin
44+
; GCN-O3-NEXT: ExpandReductionsPass
45+
; GCN-O3-NEXT: gvn<>
46+
; GCN-O3-NEXT: amdgpu-lower-kernel-arguments)
47+
; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers
48+
; GCN-O3-NEXT: cgscc(function(codegenprepare
49+
; GCN-O3-NEXT: load-store-vectorizer
50+
; GCN-O3-NEXT: lower-switch
51+
; GCN-O3-NEXT: lower-invoke
52+
; GCN-O3-NEXT: UnreachableBlockElimPass
53+
; GCN-O3-NEXT: flatten-cfg
54+
; GCN-O3-NEXT: sink
55+
; GCN-O3-NEXT: amdgpu-late-codegenprepare
56+
; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes
57+
; GCN-O3-NEXT: fix-irreducible
58+
; GCN-O3-NEXT: unify-loop-exits
59+
; GCN-O3-NEXT: StructurizeCFGPass
60+
; GCN-O3-NEXT: amdgpu-annotate-uniform
61+
; GCN-O3-NEXT: si-annotate-control-flow
62+
; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi
63+
; GCN-O3-NEXT: lcssa))
64+
; GCN-O3-NEXT: amdgpu-perf-hint
65+
; GCN-O3-NEXT: cgscc(function(require<uniformity>
66+
; GCN-O3-NEXT: callbr-prepare
67+
; GCN-O3-NEXT: safe-stack
68+
; GCN-O3-NEXT: stack-protector))
69+
; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel
70+
; GCN-O3-NEXT: si-fix-sgpr-copies
71+
; GCN-O3-NEXT: si-i1-copies
72+
; GCN-O3-NEXT: finalize-isel
73+
; GCN-O3-NEXT: early-tailduplication
74+
; GCN-O3-NEXT: opt-phis
75+
; GCN-O3-NEXT: stack-coloring
76+
; GCN-O3-NEXT: localstackalloc
77+
; GCN-O3-NEXT: dead-mi-elimination
78+
; GCN-O3-NEXT: early-machinelicm
79+
; GCN-O3-NEXT: machine-cse
80+
; GCN-O3-NEXT: machine-sink
81+
; GCN-O3-NEXT: peephole-opt
82+
; GCN-O3-NEXT: dead-mi-elimination
83+
; GCN-O3-NEXT: si-fold-operands
84+
; GCN-O3-NEXT: gcn-dpp-combine
85+
; GCN-O3-NEXT: si-load-store-opt
86+
; GCN-O3-NEXT: si-peephole-sdwa
87+
; GCN-O3-NEXT: early-machinelicm
88+
; GCN-O3-NEXT: machine-cse
89+
; GCN-O3-NEXT: si-fold-operands
90+
; GCN-O3-NEXT: dead-mi-elimination
91+
; GCN-O3-NEXT: si-shrink-instructions
92+
; GCN-O3-NEXT: detect-dead-lanes
93+
; GCN-O3-NEXT: InitUndefPass
94+
; GCN-O3-NEXT: ProcessImplicitDefsPass
95+
; GCN-O3-NEXT: unreachable-mbb-elimination
96+
; GCN-O3-NEXT: require<live-vars>
97+
; GCN-O3-NEXT: require<machine-loops>
98+
; GCN-O3-NEXT: phi-node-elimination
99+
; GCN-O3-NEXT: two-address-instruction
100+
; GCN-O3-NEXT: register-coalescer
101+
; GCN-O3-NEXT: rename-independent-subregs
102+
; GCN-O3-NEXT: machine-scheduler
103+
; GCN-O3-NEXT: greedy<all>
104+
; GCN-O3-NEXT: amdgpu-nsa-reassign
105+
; GCN-O3-NEXT: VirtRegRewriterPass
106+
; GCN-O3-NEXT: stack-slot-coloring
107+
; GCN-O3-NEXT: machine-cp
108+
; GCN-O3-NEXT: machinelicm
109+
; GCN-O3-NEXT: si-fix-vgpr-copies
110+
; GCN-O3-NEXT: si-optimize-exec-masking
111+
; GCN-O3-NEXT: remove-redundant-debug-values
112+
; GCN-O3-NEXT: fixup-statepoint-caller-saved
113+
; GCN-O3-NEXT: PostRAMachineSinkingPass
114+
; GCN-O3-NEXT: ShrinkWrapPass
115+
; GCN-O3-NEXT: PrologEpilogInserterPass
116+
; GCN-O3-NEXT: branch-folder
117+
; GCN-O3-NEXT: tailduplication
118+
; GCN-O3-NEXT: machine-latecleanup
119+
; GCN-O3-NEXT: machine-cp
120+
; GCN-O3-NEXT: post-ra-pseudos
121+
; GCN-O3-NEXT: postmisched
122+
; GCN-O3-NEXT: block-placement
123+
; GCN-O3-NEXT: fentry-insert
124+
; GCN-O3-NEXT: xray-instrumentation
125+
; GCN-O3-NEXT: patchable-function
126+
; GCN-O3-NEXT: gcn-create-vopd
127+
; GCN-O3-NEXT: si-memory-legalizer
128+
; GCN-O3-NEXT: si-insert-waitcnts
129+
; GCN-O3-NEXT: si-late-branch-lowering
130+
; GCN-O3-NEXT: si-pre-emit-peephole
131+
; GCN-O3-NEXT: post-RA-hazard-rec
132+
; GCN-O3-NEXT: AMDGPUWaitSGPRHazardsPass
133+
; GCN-O3-NEXT: amdgpu-insert-delay-alu
134+
; GCN-O3-NEXT: branch-relaxation
135+
; GCN-O3-NEXT: remove-loads-into-fake-uses
136+
; GCN-O3-NEXT: live-debug-values
137+
; GCN-O3-NEXT: machine-sanmd
138+
; GCN-O3-NEXT: stack-frame-layout)
139+
; GCN-O3-NEXT: invalidate<machine-function-info>))
140+
141+
142+
define void @empty() {
143+
ret void
144+
}

0 commit comments

Comments
 (0)