@@ -7467,23 +7467,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
7467
7467
}
7468
7468
7469
7469
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7470
- // create a merge phi node for it.
7471
- static void createAndCollectMergePhiForReduction (
7472
- VPInstruction *RedResult,
7473
- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock ,
7474
- bool VectorizingEpilogue) {
7470
+ // create a merge phi node for it and add incoming values from the main vector
7471
+ // loop.
7472
+ static void updateAndCollectMergePhiForReductionForEpilogueVectorization (
7473
+ VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7474
+ BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
7475
7475
if (!RedResult ||
7476
7476
RedResult->getOpcode () != VPInstruction::ComputeReductionResult)
7477
7477
return ;
7478
7478
7479
+ using namespace VPlanPatternMatch ;
7480
+ VPValue *ResumePhiVPV =
7481
+ cast<VPInstruction>(*find_if (RedResult->users (), [](VPUser *U) {
7482
+ return match (U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (),
7483
+ m_VPValue ()));
7484
+ }));
7485
+ auto *BCBlockPhi = cast<PHINode>(State.get (ResumePhiVPV, true ));
7479
7486
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
7480
7487
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7488
+ if (!VectorizingEpilogue)
7489
+ return ;
7481
7490
7482
- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
7483
7491
auto *ResumePhi =
7484
7492
dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7485
- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7486
- RdxDesc.getRecurrenceKind ())) {
7493
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7494
+ RdxDesc.getRecurrenceKind ())) {
7487
7495
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
7488
7496
assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
7489
7497
assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
@@ -7493,40 +7501,15 @@ static void createAndCollectMergePhiForReduction(
7493
7501
" when vectorizing the epilogue loop, we need a resume phi from main "
7494
7502
" vector loop" );
7495
7503
7496
- // TODO: bc.merge.rdx should not be created here, instead it should be
7497
- // modeled in VPlan.
7498
7504
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7499
- // Create a phi node that merges control-flow from the backedge-taken check
7500
- // block and the middle block.
7501
- auto *BCBlockPhi =
7502
- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7503
- LoopScalarPreHeader->getTerminator ()->getIterator ());
7504
-
7505
7505
// If we are fixing reductions in the epilogue loop then we should already
7506
7506
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
7507
7507
// we carry over the incoming values correctly.
7508
7508
for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7509
- if (Incoming == LoopMiddleBlock)
7510
- BCBlockPhi->addIncoming (FinalValue, Incoming);
7511
- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7512
- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7513
- Incoming);
7514
- else
7515
- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7509
+ if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7510
+ BCBlockPhi->setIncomingValueForBlock (
7511
+ Incoming, ResumePhi->getIncomingValueForBlock (Incoming));
7516
7512
}
7517
-
7518
- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7519
- // TODO: This fixup should instead be modeled in VPlan.
7520
- // Fix the scalar loop reduction variable with the incoming reduction sum
7521
- // from the vector body and from the backedge value.
7522
- int IncomingEdgeBlockIdx =
7523
- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7524
- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7525
- // Pick the other block.
7526
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7527
- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7528
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7529
- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7530
7513
}
7531
7514
7532
7515
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7617,11 +7600,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7617
7600
// 2.5 Collect reduction resume values.
7618
7601
auto *ExitVPBB =
7619
7602
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7620
- for (VPRecipeBase &R : *ExitVPBB) {
7621
- createAndCollectMergePhiForReduction (
7622
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7623
- State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7624
- }
7603
+ if (IsEpilogueVectorization)
7604
+ for (VPRecipeBase &R : *ExitVPBB) {
7605
+ updateAndCollectMergePhiForReductionForEpilogueVectorization (
7606
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7607
+ State.CFG .VPBB2IRBB [ExitVPBB], ExpandedSCEVs);
7608
+ }
7625
7609
7626
7610
// 2.6. Maintain Loop Hints
7627
7611
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9411,6 +9395,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9411
9395
});
9412
9396
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9413
9397
9398
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
9399
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
9400
+ // Order is strict: first is the exit block, second is the scalar
9401
+ // preheader.
9402
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
9403
+ } else {
9404
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
9405
+ }
9406
+
9407
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9408
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9409
+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9410
+ {}, " bc.merge.rdx" );
9411
+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9412
+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9413
+
9414
9414
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9415
9415
// with a boolean reduction phi node to check if the condition is true in
9416
9416
// any iteration. The final value is selected by the final
0 commit comments