Skip to content

Commit 2012b25

Browse files
authored
[AMDGPU][GlobalISel] Disable fixed-point iteration in all Combiners (#105517)
Disable fixed-point iteration in all AMDGPU Combiners after #102163. This saves around 2% compile time in ad hoc testing on some large graphics shaders. I did not notice any regressions in the generated code, just a bunch of harmless differences in instruction selection and register allocation.
1 parent 46707b0 commit 2012b25

File tree

4 files changed

+25
-1
lines changed

4 files changed

+25
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,11 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
499499

500500
CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
501501
LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
502-
502+
// Disable fixed-point iteration to reduce compile-time
503+
CInfo.MaxIterations = 1;
504+
CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
505+
// Legalizer performs DCE, so a full DCE pass is unnecessary.
506+
CInfo.EnableFullDCE = false;
503507
AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
504508
RuleConfig, ST, MDT, LI);
505509
return Impl.combineMachineInstrs();

llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,12 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
276276
: &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
277277
CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
278278
nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
279+
// Disable fixed-point iteration to reduce compile-time
280+
CInfo.MaxIterations = 1;
281+
CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
282+
// This is the first Combiner, so the input IR might contain dead
283+
// instructions.
284+
CInfo.EnableFullDCE = true;
279285
AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
280286
STI, MDT, STI.getLegalizerInfo());
281287
return Impl.combineMachineInstrs();

llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,12 @@ bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
454454

455455
CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
456456
LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
457+
// Disable fixed-point iteration to reduce compile-time
458+
CInfo.MaxIterations = 1;
459+
CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
460+
// RegBankSelect seems not to leave dead instructions, so a full DCE pass is
461+
// unnecessary.
462+
CInfo.EnableFullDCE = false;
457463
AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
458464
RuleConfig, ST, MDT, LI);
459465
return Impl.combineMachineInstrs();

llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ body: |
203203
; CHECK-LABEL: name: remove_and_65535_groupstaticsize
204204
; CHECK: liveins: $vgpr0_vgpr1
205205
; CHECK-NEXT: {{ $}}
206+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
206207
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
207208
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65535
208209
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -225,6 +226,7 @@ body: |
225226
; CHECK-LABEL: name: remove_and_131071_groupstaticsize
226227
; CHECK: liveins: $vgpr0_vgpr1
227228
; CHECK-NEXT: {{ $}}
229+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
228230
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
229231
; CHECK-NEXT: $vgpr0 = COPY %lds_size(s32)
230232
%ptr:_(p1) = COPY $vgpr0_vgpr1
@@ -245,6 +247,7 @@ body: |
245247
; CHECK-LABEL: name: no_remove_and_65536_groupstaticsize
246248
; CHECK: liveins: $vgpr0_vgpr1
247249
; CHECK-NEXT: {{ $}}
250+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
248251
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
249252
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65536
250253
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -267,6 +270,7 @@ body: |
267270
; CHECK-LABEL: name: no_remove_and_32767_groupstaticsize
268271
; CHECK: liveins: $vgpr0_vgpr1
269272
; CHECK-NEXT: {{ $}}
273+
; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
270274
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
271275
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 32767
272276
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -291,6 +295,8 @@ body: |
291295
; CHECK-LABEL: name: remove_and_umin_lhs_only
292296
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
293297
; CHECK-NEXT: {{ $}}
298+
; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1
299+
; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3
294300
; CHECK-NEXT: %val:_(s32) = COPY $vgpr4
295301
; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255
296302
; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255
@@ -316,6 +322,8 @@ body: |
316322
; CHECK-LABEL: name: remove_and_umin_rhs_only
317323
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
318324
; CHECK-NEXT: {{ $}}
325+
; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1
326+
; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3
319327
; CHECK-NEXT: %val:_(s32) = COPY $vgpr4
320328
; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255
321329
; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255

0 commit comments

Comments
 (0)