Skip to content

Commit b6c8ff9

Browse files
committed
Revert "AMDGPU: Fix handling of infinite loops in fragment shaders"
This reverts commit 87d98c1.
1 parent 3d04cee commit b6c8ff9

File tree

2 files changed

+6
-141
lines changed

2 files changed

+6
-141
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 6 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include "llvm/IR/InstrTypes.h"
3535
#include "llvm/IR/Instructions.h"
3636
#include "llvm/IR/Intrinsics.h"
37-
#include "llvm/IR/IRBuilder.h"
3837
#include "llvm/IR/Type.h"
3938
#include "llvm/InitializePasses.h"
4039
#include "llvm/Pass.h"
@@ -118,58 +117,24 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
118117
return true;
119118
}
120119

121-
static void removeDoneExport(Function &F) {
122-
ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext());
123-
for (BasicBlock &BB : F) {
124-
for (Instruction &I : BB) {
125-
if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) {
126-
if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
127-
Intrin->setArgOperand(6, BoolFalse); // done
128-
} else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
129-
Intrin->setArgOperand(4, BoolFalse); // done
130-
}
131-
}
132-
}
133-
}
134-
}
135-
136120
static BasicBlock *unifyReturnBlockSet(Function &F,
137121
ArrayRef<BasicBlock *> ReturningBlocks,
138-
bool InsertExport,
139122
const TargetTransformInfo &TTI,
140123
StringRef Name) {
141124
// Otherwise, we need to insert a new basic block into the function, add a PHI
142125
// nodes (if the function returns values), and convert all of the return
143126
// instructions into unconditional branches.
144127
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
145-
IRBuilder<> B(NewRetBlock);
146-
147-
if (InsertExport) {
148-
// Ensure that there's only one "done" export in the shader by removing the
149-
// "done" bit set on the original final export. More than one "done" export
150-
// can lead to undefined behavior.
151-
removeDoneExport(F);
152-
153-
Value *Undef = UndefValue::get(B.getFloatTy());
154-
B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
155-
{
156-
B.getInt32(9), // target, SQ_EXP_NULL
157-
B.getInt32(0), // enabled channels
158-
Undef, Undef, Undef, Undef, // values
159-
B.getTrue(), // done
160-
B.getTrue(), // valid mask
161-
});
162-
}
163128

164129
PHINode *PN = nullptr;
165130
if (F.getReturnType()->isVoidTy()) {
166-
B.CreateRetVoid();
131+
ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
167132
} else {
168133
// If the function doesn't return void... add a PHI node to the block...
169-
PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
170-
"UnifiedRetVal");
171-
assert(!InsertExport);
172-
B.CreateRet(PN);
134+
PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
135+
"UnifiedRetVal");
136+
NewRetBlock->getInstList().push_back(PN);
137+
ReturnInst::Create(F.getContext(), PN, NewRetBlock);
173138
}
174139

175140
// Loop over all of the blocks, replacing the return instruction with an
@@ -208,8 +173,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
208173
// Dummy return block for infinite loop.
209174
BasicBlock *DummyReturnBB = nullptr;
210175

211-
bool InsertExport = false;
212-
213176
for (BasicBlock *BB : PDT.getRoots()) {
214177
if (isa<ReturnInst>(BB->getTerminator())) {
215178
if (!isUniformlyReached(DA, *BB))
@@ -225,36 +188,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
225188
"DummyReturnBlock", &F);
226189
Type *RetTy = F.getReturnType();
227190
Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
228-
229-
// For pixel shaders, the producer guarantees that an export is
230-
// executed before each return instruction. However, if there is an
231-
// infinite loop and we insert a return ourselves, we need to uphold
232-
// that guarantee by inserting a null export. This can happen e.g. in
233-
// an infinite loop with kill instructions, which is supposed to
234-
// terminate. However, we don't need to do this if there is a non-void
235-
// return value, since then there is an epilog afterwards which will
236-
// still export.
237-
//
238-
// Note: In the case where only some threads enter the infinite loop,
239-
// this can result in the null export happening redundantly after the
240-
// original exports. However, The last "real" export happens after all
241-
// the threads that didn't enter an infinite loop converged, which
242-
// means that the only extra threads to execute the null export are
243-
// threads that entered the infinite loop, and they only could've
244-
// exited through being killed which sets their exec bit to 0.
245-
// Therefore, unless there's an actual infinite loop, which can have
246-
// invalid results, or there's a kill after the last export, which we
247-
// assume the frontend won't do, this export will have the same exec
248-
// mask as the last "real" export, and therefore the valid mask will be
249-
// overwritten with the same value and will still be correct. Also,
250-
// even though this forces an extra unnecessary export wait, we assume
251-
// that this happens rare enough in practice to that we don't have to
252-
// worry about performance.
253-
if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
254-
RetTy->isVoidTy()) {
255-
InsertExport = true;
256-
}
257-
258191
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
259192
ReturningBlocks.push_back(DummyReturnBB);
260193
}
@@ -327,6 +260,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
327260
const TargetTransformInfo &TTI
328261
= getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
329262

330-
unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock");
263+
unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
331264
return true;
332265
}

llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll

Lines changed: 0 additions & 68 deletions
This file was deleted.

0 commit comments

Comments
 (0)