Skip to content

Commit 7102251

Browse files
committed
Revert "Revert "AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns""
This reverts commit 3d04cee.
1 parent c36d5fd commit 7102251

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,12 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
195195

196196
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
197197
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
198-
if (PDT.getRoots().size() <= 1)
198+
199+
// If there's only one exit, we don't need to do anything, unless this is a
200+
// pixel shader and that exit is an infinite loop, since we still have to
201+
// insert an export in that case.
202+
if (PDT.getRoots().size() <= 1 &&
203+
F.getCallingConv() != CallingConv::AMDGPU_PS)
199204
return false;
200205

201206
LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
@@ -321,7 +326,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
321326
if (ReturningBlocks.empty())
322327
return false; // No blocks return
323328

324-
if (ReturningBlocks.size() == 1)
329+
if (ReturningBlocks.size() == 1 && !InsertExport)
325330
return false; // Already has a single return block
326331

327332
const TargetTransformInfo &TTI

llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ end:
4545
ret void
4646
}
4747

48+
; test the case where there's only a kill in an infinite loop
49+
; CHECK-LABEL: only_kill
50+
; CHECK: exp null off, off, off, off done vm
51+
; CHECK-NEXT: s_endpgm
52+
; SIInsertSkips inserts an extra null export here, but it should be harmless.
53+
; CHECK: exp null off, off, off, off done vm
54+
; CHECK-NEXT: s_endpgm
55+
define amdgpu_ps void @only_kill() #0 {
56+
main_body:
57+
br label %loop
58+
59+
loop:
60+
call void @llvm.amdgcn.kill(i1 false) #3
61+
br label %loop
62+
}
63+
4864
; In case there's an epilog, we shouldn't have to do this.
4965
; CHECK-LABEL: return_nonvoid
5066
; CHECK-NOT: exp null off, off, off, off done vm

llvm/test/CodeGen/AMDGPU/update-phi.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 {
1414
; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
1515
; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
1616
; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
17-
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]]
17+
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]
1818
; IR: TransitionBlock:
1919
; IR-NEXT: br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]
2020
; IR: n31:
2121
; IR-NEXT: ret void
22-
; IR: DummyReturnBlock:
22+
; IR: UnifiedReturnBlock:
23+
; IR-NEXT: call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)
2324
; IR-NEXT: ret void
2425
;
2526
.entry:

0 commit comments

Comments
 (0)