Skip to content

Commit d3614bc

Browse files
committed
[VPlan] Use ResumePhi to create reduction resume phis.
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
1 parent 2190ffa commit d3614bc

File tree

4 files changed

+61
-41
lines changed

4 files changed

+61
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7467,23 +7467,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74677467
}
74687468

74697469
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7470-
// create a merge phi node for it.
7471-
static void createAndCollectMergePhiForReduction(
7472-
VPInstruction *RedResult,
7473-
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
7474-
bool VectorizingEpilogue) {
7470+
// create a merge phi node for it and add incoming values from the main vector
7471+
// loop.
7472+
static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
7473+
VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7474+
BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
74757475
if (!RedResult ||
74767476
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
74777477
return;
74787478

7479+
using namespace VPlanPatternMatch;
7480+
VPValue *ResumePhiVPV =
7481+
cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
7482+
return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
7483+
m_VPValue()));
7484+
}));
7485+
auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
74797486
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74807487
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
7488+
if (!VectorizingEpilogue)
7489+
return;
74817490

7482-
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
74837491
auto *ResumePhi =
74847492
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7485-
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7486-
RdxDesc.getRecurrenceKind())) {
7493+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7494+
RdxDesc.getRecurrenceKind())) {
74877495
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
74887496
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
74897497
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
@@ -7493,40 +7501,15 @@ static void createAndCollectMergePhiForReduction(
74937501
"when vectorizing the epilogue loop, we need a resume phi from main "
74947502
"vector loop");
74957503

7496-
// TODO: bc.merge.rdx should not be created here, instead it should be
7497-
// modeled in VPlan.
74987504
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
7499-
// Create a phi node that merges control-flow from the backedge-taken check
7500-
// block and the middle block.
7501-
auto *BCBlockPhi =
7502-
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
7503-
LoopScalarPreHeader->getTerminator()->getIterator());
7504-
75057505
// If we are fixing reductions in the epilogue loop then we should already
75067506
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
75077507
// we carry over the incoming values correctly.
75087508
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
7509-
if (Incoming == LoopMiddleBlock)
7510-
BCBlockPhi->addIncoming(FinalValue, Incoming);
7511-
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7512-
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
7513-
Incoming);
7514-
else
7515-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7509+
if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7510+
BCBlockPhi->setIncomingValueForBlock(
7511+
Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
75167512
}
7517-
7518-
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
7519-
// TODO: This fixup should instead be modeled in VPlan.
7520-
// Fix the scalar loop reduction variable with the incoming reduction sum
7521-
// from the vector body and from the backedge value.
7522-
int IncomingEdgeBlockIdx =
7523-
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
7524-
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
7525-
// Pick the other block.
7526-
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
7527-
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
7528-
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
7529-
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
75307513
}
75317514

75327515
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
@@ -7617,11 +7600,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76177600
// 2.5 Collect reduction resume values.
76187601
auto *ExitVPBB =
76197602
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7620-
for (VPRecipeBase &R : *ExitVPBB) {
7621-
createAndCollectMergePhiForReduction(
7622-
dyn_cast<VPInstruction>(&R), State, OrigLoop,
7623-
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7624-
}
7603+
if (IsEpilogueVectorization)
7604+
for (VPRecipeBase &R : *ExitVPBB) {
7605+
updateAndCollectMergePhiForReductionForEpilogueVectorization(
7606+
dyn_cast<VPInstruction>(&R), State, OrigLoop,
7607+
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7608+
}
76257609

76267610
// 2.6. Maintain Loop Hints
76277611
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9411,6 +9395,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94119395
});
94129396
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
94139397

9398+
VPBasicBlock *ScalarPHVPBB = nullptr;
9399+
if (MiddleVPBB->getNumSuccessors() == 2) {
9400+
// Order is strict: first is the exit block, second is the scalar
9401+
// preheader.
9402+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
9403+
} else {
9404+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
9405+
}
9406+
9407+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9408+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9409+
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9410+
{}, "bc.merge.rdx");
9411+
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9412+
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9413+
94149414
// Adjust AnyOf reductions; replace the reduction phi for the selected value
94159415
// with a boolean reduction phi node to check if the condition is true in
94169416
// any iteration. The final value is selected by the final

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
6565
; IF-EVL-INLOOP-NEXT: No successors
6666
; IF-EVL-INLOOP-EMPTY:
6767
; IF-EVL-INLOOP-NEXT: scalar.ph:
68+
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
6869
; IF-EVL-INLOOP-NEXT: No successors
70+
; IF-EVL-INLOOP-EMPTY:
71+
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
6972
; IF-EVL-INLOOP-NEXT: }
7073
;
7174

@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
104107
; NO-VP-OUTLOOP-NEXT: No successors
105108
; NO-VP-OUTLOOP-EMPTY:
106109
; NO-VP-OUTLOOP-NEXT: scalar.ph:
110+
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
107111
; NO-VP-OUTLOOP-NEXT: No successors
112+
; NO-VP-OUTLOOP-EMPTY:
113+
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
108114
; NO-VP-OUTLOOP-NEXT: }
109115
;
110116

@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
143149
; NO-VP-INLOOP-NEXT: No successors
144150
; NO-VP-INLOOP-EMPTY:
145151
; NO-VP-INLOOP-NEXT: scalar.ph:
152+
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
146153
; NO-VP-INLOOP-NEXT: No successors
154+
; NO-VP-INLOOP-EMPTY:
155+
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
147156
; NO-VP-INLOOP-NEXT: }
148157
;
149158
entry:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
232232
; CHECK-EMPTY:
233233
; CHECK-NEXT: scalar.ph
234234
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
235+
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
235236
; CHECK-NEXT: No successors
236237
; CHECK-EMPTY:
237238
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
239+
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
238240
; CHECK-NEXT: }
239241
;
240242
entry:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
165165
; CHECK-NEXT: No successors
166166
; CHECK-EMPTY:
167167
; CHECK-NEXT: scalar.ph
168+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
168169
; CHECK-NEXT: No successors
170+
; CHECK-EMPTY:
171+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
169172
; CHECK-NEXT: }
170173
;
171174
entry:
@@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
221224
; CHECK-NEXT: No successors
222225
; CHECK-EMPTY:
223226
; CHECK-NEXT: scalar.ph
227+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
224228
; CHECK-NEXT: No successors
229+
; CHECK-EMPTY:
230+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
225231
; CHECK-NEXT: }
226232
;
227233
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
447453
; CHECK-NEXT: No successors
448454
; CHECK-EMPTY:
449455
; CHECK-NEXT: scalar.ph
456+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
450457
; CHECK-NEXT: No successors
458+
; CHECK-EMPTY:
459+
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
451460
; CHECK-NEXT:}
452461

453462
entry:

0 commit comments

Comments
 (0)