-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LoopUnroll] Consider convergence control tokens when unrolling #91715
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-llvm-transforms Author: Sameer Sahasrabuddhe (ssahasra) Changes
Original implementation [D85605] by Nicolai Haehnle <[email protected]>. Patch is 40.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91715.diff 12 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index a9431bca11251..97f24f6159896 100644
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -20,6 +20,7 @@
namespace llvm {
class AssumptionCache;
class BasicBlock;
+class Instruction;
class Loop;
class Function;
template <class T> class SmallPtrSetImpl;
@@ -45,6 +46,9 @@ struct CodeMetrics {
/// True if this function contains a call to a convergent function.
bool convergent = false;
+ /// True if the code contains an uncontrolled convergent operation.
+ bool convergentUncontrolled = false;
+
/// True if this function calls alloca (in the C sense).
bool usesDynamicAlloca = false;
@@ -77,7 +81,7 @@ struct CodeMetrics {
/// Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
- bool PrepareForLTO = false);
+ bool PrepareForLTO = false, const Loop *L = nullptr);
/// Collect a loop's ephemeral values (those used only by an assume
/// or similar intrinsics in the loop).
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c5..4f06a7e889f91 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -649,6 +649,9 @@ int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
std::optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
StringRef Name);
+/// Find the convergence heart of the loop.
+CallBase *getLoopConvergenceHeart(const Loop *TheLoop);
+
/// Look for the loop attribute that requires progress within the loop.
/// Note: Most consumers probably want "isMustProgress" which checks
/// the containing function attribute too.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 9dd1bb455a718..441e6a1e79843 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1588,6 +1588,14 @@ class CallBase : public Instruction {
static CallBase *removeOperandBundle(CallBase *CB, uint32_t ID,
BasicBlock::iterator InsertPt);
+ /// Return the convergence control token for this call, if it exists.
+ Value *getConvergenceControlToken() const {
+ if (auto Bundle = getOperandBundle(llvm::LLVMContext::OB_convergencectrl)) {
+ return Bundle->Inputs[0].get();
+ }
+ return nullptr;
+ }
+
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call ||
I->getOpcode() == Instruction::Invoke ||
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index fcd3a1025ac13..9010e1a1c896b 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1799,17 +1799,14 @@ class ConvergenceControlInst : public IntrinsicInst {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
- // Returns the convergence intrinsic referenced by |I|'s convergencectrl
- // attribute if any.
- static IntrinsicInst *getParentConvergenceToken(Instruction *I) {
- auto *CI = dyn_cast<llvm::CallInst>(I);
- if (!CI)
- return nullptr;
-
- auto Bundle = CI->getOperandBundle(llvm::LLVMContext::OB_convergencectrl);
- assert(Bundle->Inputs.size() == 1 &&
- Bundle->Inputs[0]->getType()->isTokenTy());
- return dyn_cast<llvm::IntrinsicInst>(Bundle->Inputs[0].get());
+ bool isAnchor() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_anchor;
+ }
+ bool isEntry() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_entry;
+ }
+ bool isLoop() {
+ return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
}
};
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index bd804dc112662..25eccd60db3dd 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -73,6 +73,7 @@ struct UnrollLoopOptions {
bool AllowExpensiveTripCount;
bool UnrollRemainder;
bool ForgetAllSCEV;
+ const Instruction *Heart = nullptr;
};
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
@@ -129,6 +130,7 @@ class UnrollCostEstimator {
public:
unsigned NumInlineCandidates;
bool Convergent;
+ bool ConvergentAllowsRuntime;
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues,
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 2637e2f97dbb2..42c21f8ff1913 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
@@ -111,11 +112,22 @@ void CodeMetrics::collectEphemeralValues(
completeEphemeralValues(Visited, Worklist, EphValues);
}
+static bool isUsedOutsideOfLoop(const Instruction &I, const Loop &L) {
+ for (const auto *U : I.users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!L.contains(I->getParent()))
+ return true;
+ }
+ }
+ return false;
+}
+
/// Fill in the current structure with information gleaned from the specified
/// block.
void CodeMetrics::analyzeBasicBlock(
const BasicBlock *BB, const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
+ const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO,
+ const Loop *L) {
++NumBlocks;
InstructionCost NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
@@ -163,20 +175,34 @@ void CodeMetrics::analyzeBasicBlock(
if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
++NumVectorInsts;
- if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
+ if (L && isa<ConvergenceControlInst>(I)) {
+ if (isUsedOutsideOfLoop(I, *L)) {
+ LLVM_DEBUG(dbgs() << I
+ << "\n Cannot duplicate a convergence control token "
+ "used outside the loop.\n");
+ notDuplicatable = true;
+ }
+ } else if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) {
+ LLVM_DEBUG(dbgs() << I
+ << "\n Cannot duplicate a token value used outside "
+ "the current block.\n");
notDuplicatable = true;
+ }
- if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (CI->cannotDuplicate())
+ if (const CallBase *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->cannotDuplicate())
notDuplicatable = true;
- if (CI->isConvergent())
+ if (CB->isConvergent()) {
+ LLVM_DEBUG(dbgs() << "Found a convergent operation.\n");
convergent = true;
+ if (!isa<ConvergenceControlInst>(CB) &&
+ !CB->getConvergenceControlToken()) {
+ LLVM_DEBUG(dbgs() << " uncontrolled.\n");
+ convergentUncontrolled = true;
+ }
+ }
}
- if (const InvokeInst *InvI = dyn_cast<InvokeInst>(&I))
- if (InvI->cannotDuplicate())
- notDuplicatable = true;
-
NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
}
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0f..c34c4974382ea 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1105,6 +1105,26 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default);
}
+CallBase *llvm::getLoopConvergenceHeart(const Loop *TheLoop) {
+ BasicBlock *H = TheLoop->getHeader();
+ for (Instruction &II : *H) {
+ if (auto *CB = dyn_cast<CallBase>(&II)) {
+ if (!CB->isConvergent())
+ continue;
+ // This is the heart if it uses a token defined outside the loop. The
+ // verifier has already checked that only the loop intrinsic can use such
+ // a token.
+ if (auto *Token = CB->getConvergenceControlToken()) {
+ auto *TokenDef = cast<Instruction>(Token);
+ if (!TheLoop->contains(TokenDef->getParent()))
+ return CB;
+ }
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
bool llvm::isFinite(const Loop *L) {
return L->getHeader()->getParent()->willReturn();
}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 7b4c54370e48a..5374d469322c9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -341,6 +341,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
+ // FIXME: Allow unrolling for convergent operations anchored inside the loop.
if (InnerUCE.Convergent || OuterUCE.Convergent) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop with convergent instructions.\n");
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 10fc9e9303e89..fcf53d9e0ce06 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -684,11 +684,14 @@ UnrollCostEstimator::UnrollCostEstimator(
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
+ Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
+ L);
NumInlineCandidates = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
LoopSize = Metrics.NumInsts;
+ ConvergentAllowsRuntime =
+ !Metrics.convergentUncontrolled && !getLoopConvergenceHeart(L);
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -1254,15 +1257,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// is unsafe -- it adds a control-flow dependency to the convergent
// operation. Therefore restrict remainder loop (try unrolling without).
//
- // TODO: This is quite conservative. In practice, convergent_op()
- // is likely to be called unconditionally in the loop. In this
- // case, the program would be ill-formed (on most architectures)
- // unless n were the same on all threads in a thread group.
- // Assuming n is the same on all threads, any kind of unrolling is
- // safe. But currently llvm's notion of convergence isn't powerful
- // enough to express this.
- if (UCE.Convergent)
- UP.AllowRemainder = false;
+ // TODO: This is somewhat conservative; we could allow the remainder if the
+ // trip count is uniform.
+ UP.AllowRemainder &= UCE.ConvergentAllowsRuntime;
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
@@ -1282,6 +1279,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
if (!UP.Count)
return LoopUnrollResult::Unmodified;
+ UP.Runtime &= UCE.ConvergentAllowsRuntime;
+
if (PP.PeelCount) {
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
@@ -1324,11 +1323,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// Unroll the loop.
Loop *RemainderLoop = nullptr;
+ UnrollLoopOptions ULO;
+ ULO.Count = UP.Count;
+ ULO.Force = UP.Force;
+ ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount;
+ ULO.UnrollRemainder = UP.UnrollRemainder;
+ ULO.Runtime = UP.Runtime;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ ULO.Heart = getLoopConvergenceHeart(L);
LoopUnrollResult UnrollResult = UnrollLoop(
- L,
- {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UP.UnrollRemainder, ForgetAllSCEV},
- LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
+ L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 20978cf2e748a..e73ca48824c08 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -415,6 +415,26 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
+// Loops containing convergent instructions that are uncontrolled or controlled
+// from outside the loop must have a count that divides their TripMultiple.
+LLVM_ATTRIBUTE_USED
+static bool canHaveUnrollRemainder(const Loop *L) {
+ if (getLoopConvergenceHeart(L))
+ return false;
+
+ // Check for uncontrolled convergent operations.
+ for (auto &BB : L->blocks()) {
+ for (auto &I : *BB) {
+ if (isa<ConvergenceControlInst>(I))
+ return true;
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (CB->isConvergent())
+ return CB->getConvergenceControlToken();
+ }
+ }
+ return true;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
@@ -560,19 +580,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- // Loops containing convergent instructions cannot use runtime unrolling,
- // as the prologue/epilogue may add additional control-dependencies to
- // convergent operations.
- LLVM_DEBUG(
- {
- bool HasConvergent = false;
- for (auto &BB : L->blocks())
- for (auto &I : *BB)
- if (auto *CB = dyn_cast<CallBase>(&I))
- HasConvergent |= CB->isConvergent();
- assert((!HasConvergent || !ULO.Runtime) &&
- "Can't runtime unroll if loop contains a convergent operation.");
- });
+ assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
bool EpilogProfitability =
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
@@ -718,7 +727,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (OldLoop)
LoopsToSimplify.insert(NewLoops[OldLoop]);
- if (*BB == Header)
+ if (*BB == Header) {
// Loop over all of the PHI nodes in the block, changing them to use
// the incoming values from the previous block.
for (PHINode *OrigPHI : OrigPHINode) {
@@ -731,6 +740,16 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
NewPHI->eraseFromParent();
}
+ // Eliminate copies of the loop heart intrinsic, if any.
+ if (ULO.Heart) {
+ auto it = VMap.find(ULO.Heart);
+ assert(it != VMap.end());
+ Instruction *heartCopy = cast<Instruction>(it->second);
+ heartCopy->eraseFromParent();
+ VMap.erase(it);
+ }
+ }
+
// Update our running map of newest clones
LastValueMap[*BB] = New;
for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index e1af02829c1da..dd7150bc63ec4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1016,12 +1016,17 @@ bool llvm::UnrollRuntimeLoopRemainder(
auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
- UnrollResult =
- UnrollLoop(remainderLoop,
- {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
- /*AllowExpensiveTripCount*/ false,
- /*UnrollRemainder*/ false, ForgetAllSCEV},
- LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollLoopOptions ULO;
+ ULO.Count = Count - 1;
+ ULO.Force = false;
+ ULO.Runtime = false;
+ ULO.AllowExpensiveTripCount = false;
+ ULO.UnrollRemainder = false;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ assert(!getLoopConvergenceHeart(L) &&
+ "A loop with a convergence heart does not allow runtime unrolling.");
+ UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,
+ /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
new file mode 100644
index 0000000000000..7fd4eb18f16eb
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+declare void @g()
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+define i32 @full_unroll() {
+; CHECK-LABEL: @full_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %anchor = call token @llvm.experimental.convergence.anchor()
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %a ]
+ %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 3
+ br label %a
+
+a:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled. The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+define i32 @runtime_unroll(i32 %n) {
+; CHECK-LABEL: @runtime_unroll(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ANCHOR:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: [[LOOP_CTL:%.*]] = mul nsw i32 [[N:%.*]], 12
+; CHECK-NEXT: br label [[L3:%.*]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[A_3:%.*]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[ANCHOR]]) ]
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_1:%.*]]
+; CHECK: a.1:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_2:%.*]]
+; CHECK: a.2:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: br label [[A_3]]
+; CHECK: a.3:
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; ...
[truncated]
|
/// True if this function contains a call to a convergent function. | ||
bool convergent = false; | ||
|
||
/// True if the code contains an uncontrolled convergent operation. | ||
bool convergentUncontrolled = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe combine these into a ConvergenceType field?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
llvm/lib/Analysis/CodeMetrics.cpp
Outdated
@@ -111,11 +112,22 @@ void CodeMetrics::collectEphemeralValues( | |||
completeEphemeralValues(Visited, Worklist, EphValues); | |||
} | |||
|
|||
static bool isUsedOutsideOfLoop(const Instruction &I, const Loop &L) { | |||
for (const auto *U : I.users()) { | |||
if (auto *I = dyn_cast<Instruction>(U)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All users of Instructions are Instructions; just cast<>
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
@@ -731,6 +740,16 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, | |||
NewPHI->eraseFromParent(); | |||
} | |||
|
|||
// Eliminate copies of the loop heart intrinsic, if any. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I understand the rules here correctly, this is only necessary if you're not completely unrolling the loop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct. If the loop is completely unrolled, then the loop intrinsic is redundant. Some other pass should eventually clean that up. But when a loop is partially unrolled, those copies of the heart violate the static rules, and all of them should be replaced by the instance from the header.
llvm/lib/Analysis/CodeMetrics.cpp
Outdated
LLVM_DEBUG(dbgs() << I | ||
<< "\n Cannot duplicate a convergence control token " | ||
"used outside the loop.\n"); | ||
notDuplicatable = true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like it's overloading the meaning of "notDuplicatable"; maybe consider splitting the convergence stuff into a separate bit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The meaning of "notDuplicatable" is not overloaded; it's exactly what the name says, right? It's just that now a block may be "not duplicatable" for two different reasons. But we don't really have any clients that need to distinguish why something is notDuplicatable. We could revisit this variable when such a distinction becomes necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FunctionSpecialization is currently using CodeMetrics; does it really not care about the distinction?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think we should get rid of noduplicate entirely. It was a failed attempt to avoid inventing convergence
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. canUnroll()
now separately considers any discovered convergence instead of mixing it with notDuplicatable
.
Latest version looks fine to me, but I'll let @arsenm give final approval. |
- There is no restriction on a loop with controlled convergent operations when the relevant tokens are defined and used within the loop. - When a token defined outside a loop is used inside (also called a loop convergence heart), unrolling is allowed only in the absence of remainder or runtime checks. - When a token defined inside a loop is used outside, such a loop is said to be "extended". This loop can only be unrolled by also duplicating the extended part lying outside the loop. Such unrolling is disabled for now. - Clean up loop hearts: When unrolling a loop with a heart, duplicating the heart will introduce multiple static uses of a convergence control token in a cycle that does not contain its definition. This violates the static rules for tokens, and needs to be cleaned up into a single occurrence of the intrinsic. - Spell out the initializer for UnrollLoopOptions to improve readability. Original implementation [D85605] by Nicolai Haehnle <[email protected]>.
0402406
to
7f17a75
Compare
There is no restriction on a loop with controlled convergent operations when
the relevant tokens are defined and used within the loop.
When a token defined outside a loop is used inside (also called a loop
convergence heart), unrolling is allowed only in the absence of remainder or
runtime checks.
When a token defined inside a loop is used outside, such a loop is said to be
"extended". This loop can only be unrolled by also duplicating the extended part
lying outside the loop. Such unrolling is disabled for now.
Clean up loop hearts: When unrolling a loop with a heart, duplicating the
heart will introduce multiple static uses of a convergence control token in a
cycle that does not contain its definition. This violates the static rules for
tokens, and needs to be cleaned up into a single occurrence of the intrinsic.
Spell out the initializer for UnrollLoopOptions to improve readability.
Original implementation [D85605] by Nicolai Haehnle [email protected].