Skip to content

Commit 0d0abb3

Browse files
authored
[VPlan] Use ResumePhi to create reduction resume phis. (#110004)
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values in the scalar preheader, similar to how ResumePhis are used for first-order recurrence resume values after 9a5a873. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop. Updating phis for the epilogue vector loops requires special attention, because additional incoming values from the bypass blocks need to be added. PR: #110004
1 parent 66bbbf2 commit 0d0abb3

File tree

4 files changed

+88
-62
lines changed

4 files changed

+88
-62
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
75627562
}
75637563
}
75647564

7565-
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7566-
// create a merge phi node for it.
7567-
static void createAndCollectMergePhiForReduction(
7568-
VPInstruction *RedResult,
7569-
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
7570-
bool VectorizingEpilogue) {
7571-
if (!RedResult ||
7572-
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
7565+
// If \p R is a ComputeReductionResult when vectorizing the epilog loop,
7566+
// fix the reduction's scalar PHI node by adding the incoming value from the
7567+
// main vector loop.
7568+
static void fixReductionScalarResumeWhenVectorizingEpilog(
7569+
VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
7570+
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
7571+
if (!EpiRedResult ||
7572+
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
75737573
return;
75747574

7575-
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
7576-
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
7577-
7578-
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
7579-
auto *ResumePhi =
7580-
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7581-
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7582-
RdxDesc.getRecurrenceKind())) {
7583-
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
7584-
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
7585-
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
7586-
ResumePhi = cast<PHINode>(Cmp->getOperand(0));
7587-
}
7588-
assert((!VectorizingEpilogue || ResumePhi) &&
7589-
"when vectorizing the epilogue loop, we need a resume phi from main "
7590-
"vector loop");
7591-
7592-
// TODO: bc.merge.rdx should not be created here, instead it should be
7593-
// modeled in VPlan.
7594-
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
7595-
// Create a phi node that merges control-flow from the backedge-taken check
7596-
// block and the middle block.
7597-
auto *BCBlockPhi =
7598-
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
7599-
LoopScalarPreHeader->getTerminator()->getIterator());
7600-
7601-
// If we are fixing reductions in the epilogue loop then we should already
7602-
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
7603-
// we carry over the incoming values correctly.
7575+
auto *EpiRedHeaderPhi =
7576+
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
7577+
const RecurrenceDescriptor &RdxDesc =
7578+
EpiRedHeaderPhi->getRecurrenceDescriptor();
7579+
Value *MainResumeValue =
7580+
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
7581+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7582+
RdxDesc.getRecurrenceKind())) {
7583+
auto *Cmp = cast<ICmpInst>(MainResumeValue);
7584+
assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
7585+
"AnyOf expected to start with ICMP_NE");
7586+
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() &&
7587+
"AnyOf expected to start by comparing main resume value to original "
7588+
"start value");
7589+
MainResumeValue = Cmp->getOperand(0);
7590+
}
7591+
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
7592+
7593+
// When fixing reductions in the epilogue loop we should already have
7594+
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
7595+
// over the incoming values correctly.
7596+
using namespace VPlanPatternMatch;
7597+
auto IsResumePhi = [](VPUser *U) {
7598+
return match(
7599+
U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(), m_VPValue()));
7600+
};
7601+
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
7602+
"ResumePhi must have a single user");
7603+
auto *EpiResumePhiVPI =
7604+
cast<VPInstruction>(*find_if(EpiRedResult->users(), IsResumePhi));
7605+
auto *EpiResumePhi = cast<PHINode>(State.get(EpiResumePhiVPI, true));
7606+
BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent();
7607+
bool Updated = false;
76047608
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
7605-
if (Incoming == LoopMiddleBlock)
7606-
BCBlockPhi->addIncoming(FinalValue, Incoming);
7607-
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7608-
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
7609-
Incoming);
7610-
else
7611-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7609+
if (is_contained(MainResumePhi->blocks(), Incoming)) {
7610+
assert(EpiResumePhi->getIncomingValueForBlock(Incoming) ==
7611+
RdxDesc.getRecurrenceStartValue() &&
7612+
"Trying to reset unexpected value");
7613+
assert(!Updated && "Should update at most 1 incoming value");
7614+
EpiResumePhi->setIncomingValueForBlock(
7615+
Incoming, MainResumePhi->getIncomingValueForBlock(Incoming));
7616+
Updated = true;
7617+
}
76127618
}
7613-
7614-
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
7615-
// TODO: This fixup should instead be modeled in VPlan.
7616-
// Fix the scalar loop reduction variable with the incoming reduction sum
7617-
// from the vector body and from the backedge value.
7618-
int IncomingEdgeBlockIdx =
7619-
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
7620-
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
7621-
// Pick the other block.
7622-
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
7623-
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
7624-
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
7625-
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
7619+
assert(Updated && "Must update EpiResumePhi.");
7620+
(void)Updated;
76267621
}
76277622

76287623
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
@@ -7713,11 +7708,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77137708
// 2.5 Collect reduction resume values.
77147709
auto *ExitVPBB =
77157710
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7716-
for (VPRecipeBase &R : *ExitVPBB) {
7717-
createAndCollectMergePhiForReduction(
7718-
dyn_cast<VPInstruction>(&R), State, OrigLoop,
7719-
State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue);
7720-
}
7711+
if (VectorizingEpilogue)
7712+
for (VPRecipeBase &R : *ExitVPBB) {
7713+
fixReductionScalarResumeWhenVectorizingEpilog(
7714+
&R, State, State.CFG.VPBB2IRBB[ExitVPBB]);
7715+
}
77217716

77227717
// 2.6. Maintain Loop Hints
77237718
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95189513
});
95199514
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
95209515

9516+
// Order is strict: if there are multiple successors, the first is the exit
9517+
// block, second is the scalar preheader.
9518+
VPBasicBlock *ScalarPHVPBB =
9519+
cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back());
9520+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9521+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9522+
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9523+
{}, "bc.merge.rdx");
9524+
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9525+
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9526+
95219527
// Adjust AnyOf reductions; replace the reduction phi for the selected value
95229528
// with a boolean reduction phi node to check if the condition is true in
95239529
// any iteration. The final value is selected by the final

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
6565
; IF-EVL-INLOOP-NEXT: No successors
6666
; IF-EVL-INLOOP-EMPTY:
6767
; IF-EVL-INLOOP-NEXT: scalar.ph:
68+
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
6869
; IF-EVL-INLOOP-NEXT: No successors
70+
; IF-EVL-INLOOP-EMPTY:
71+
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
6972
; IF-EVL-INLOOP-NEXT: }
7073
;
7174

@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
104107
; NO-VP-OUTLOOP-NEXT: No successors
105108
; NO-VP-OUTLOOP-EMPTY:
106109
; NO-VP-OUTLOOP-NEXT: scalar.ph:
110+
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
107111
; NO-VP-OUTLOOP-NEXT: No successors
112+
; NO-VP-OUTLOOP-EMPTY:
113+
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
108114
; NO-VP-OUTLOOP-NEXT: }
109115
;
110116

@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
143149
; NO-VP-INLOOP-NEXT: No successors
144150
; NO-VP-INLOOP-EMPTY:
145151
; NO-VP-INLOOP-NEXT: scalar.ph:
152+
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
146153
; NO-VP-INLOOP-NEXT: No successors
154+
; NO-VP-INLOOP-EMPTY:
155+
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
147156
; NO-VP-INLOOP-NEXT: }
148157
;
149158
entry:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
232232
; CHECK-EMPTY:
233233
; CHECK-NEXT: scalar.ph
234234
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
235+
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
235236
; CHECK-NEXT: No successors
236237
; CHECK-EMPTY:
237238
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
239+
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
238240
; CHECK-NEXT: }
239241
;
240242
entry:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
165165
; CHECK-NEXT: No successors
166166
; CHECK-EMPTY:
167167
; CHECK-NEXT: scalar.ph
168+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
168169
; CHECK-NEXT: No successors
170+
; CHECK-EMPTY:
171+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
169172
; CHECK-NEXT: }
170173
;
171174
entry:
@@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
221224
; CHECK-NEXT: No successors
222225
; CHECK-EMPTY:
223226
; CHECK-NEXT: scalar.ph
227+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
224228
; CHECK-NEXT: No successors
229+
; CHECK-EMPTY:
230+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
225231
; CHECK-NEXT: }
226232
;
227233
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
447453
; CHECK-NEXT: No successors
448454
; CHECK-EMPTY:
449455
; CHECK-NEXT: scalar.ph
456+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
450457
; CHECK-NEXT: No successors
458+
; CHECK-EMPTY:
459+
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
451460
; CHECK-NEXT:}
452461

453462
entry:

0 commit comments

Comments
 (0)