@@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
7562
7562
}
7563
7563
}
7564
7564
7565
- // Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7566
- // create a merge phi node for it.
7567
- static void createAndCollectMergePhiForReduction (
7568
- VPInstruction *RedResult,
7569
- VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
7570
- bool VectorizingEpilogue) {
7571
- if (!RedResult ||
7572
- RedResult ->getOpcode () != VPInstruction::ComputeReductionResult)
7565
+ // If \p R is a ComputeReductionResult when vectorizing the epilog loop,
7566
+ // fix the reduction's scalar PHI node by adding the incoming value from the
7567
+ // main vector loop.
7568
+ static void fixReductionScalarResumeWhenVectorizingEpilog (
7569
+ VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
7570
+ auto *EpiRedResult = dyn_cast<VPInstruction>(R);
7571
+ if (!EpiRedResult ||
7572
+ EpiRedResult ->getOpcode () != VPInstruction::ComputeReductionResult)
7573
7573
return ;
7574
7574
7575
- auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand (0 ));
7576
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
7577
-
7578
- Value *FinalValue = State.get (RedResult, VPLane (VPLane::getFirstLane ()));
7579
- auto *ResumePhi =
7580
- dyn_cast<PHINode>(PhiR->getStartValue ()->getUnderlyingValue ());
7581
- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind (
7582
- RdxDesc.getRecurrenceKind ())) {
7583
- auto *Cmp = cast<ICmpInst>(PhiR->getStartValue ()->getUnderlyingValue ());
7584
- assert (Cmp->getPredicate () == CmpInst::ICMP_NE);
7585
- assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue ());
7586
- ResumePhi = cast<PHINode>(Cmp->getOperand (0 ));
7587
- }
7588
- assert ((!VectorizingEpilogue || ResumePhi) &&
7589
- " when vectorizing the epilogue loop, we need a resume phi from main "
7590
- " vector loop" );
7591
-
7592
- // TODO: bc.merge.rdx should not be created here, instead it should be
7593
- // modeled in VPlan.
7594
- BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader ();
7595
- // Create a phi node that merges control-flow from the backedge-taken check
7596
- // block and the middle block.
7597
- auto *BCBlockPhi =
7598
- PHINode::Create (FinalValue->getType (), 2 , " bc.merge.rdx" ,
7599
- LoopScalarPreHeader->getTerminator ()->getIterator ());
7600
-
7601
- // If we are fixing reductions in the epilogue loop then we should already
7602
- // have created a bc.merge.rdx Phi after the main vector body. Ensure that
7603
- // we carry over the incoming values correctly.
7575
+ auto *EpiRedHeaderPhi =
7576
+ cast<VPReductionPHIRecipe>(EpiRedResult->getOperand (0 ));
7577
+ const RecurrenceDescriptor &RdxDesc =
7578
+ EpiRedHeaderPhi->getRecurrenceDescriptor ();
7579
+ Value *MainResumeValue =
7580
+ EpiRedHeaderPhi->getStartValue ()->getUnderlyingValue ();
7581
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind (
7582
+ RdxDesc.getRecurrenceKind ())) {
7583
+ auto *Cmp = cast<ICmpInst>(MainResumeValue);
7584
+ assert (Cmp->getPredicate () == CmpInst::ICMP_NE &&
7585
+ " AnyOf expected to start with ICMP_NE" );
7586
+ assert (Cmp->getOperand (1 ) == RdxDesc.getRecurrenceStartValue () &&
7587
+ " AnyOf expected to start by comparing main resume value to original "
7588
+ " start value" );
7589
+ MainResumeValue = Cmp->getOperand (0 );
7590
+ }
7591
+ PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
7592
+
7593
+ // When fixing reductions in the epilogue loop we should already have
7594
+ // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
7595
+ // over the incoming values correctly.
7596
+ using namespace VPlanPatternMatch ;
7597
+ auto IsResumePhi = [](VPUser *U) {
7598
+ return match (
7599
+ U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue (), m_VPValue ()));
7600
+ };
7601
+ assert (count_if (EpiRedResult->users (), IsResumePhi) == 1 &&
7602
+ " ResumePhi must have a single user" );
7603
+ auto *EpiResumePhiVPI =
7604
+ cast<VPInstruction>(*find_if (EpiRedResult->users (), IsResumePhi));
7605
+ auto *EpiResumePhi = cast<PHINode>(State.get (EpiResumePhiVPI, true ));
7606
+ BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent ();
7607
+ bool Updated = false ;
7604
7608
for (auto *Incoming : predecessors (LoopScalarPreHeader)) {
7605
- if (Incoming == LoopMiddleBlock)
7606
- BCBlockPhi->addIncoming (FinalValue, Incoming);
7607
- else if (ResumePhi && is_contained (ResumePhi->blocks (), Incoming))
7608
- BCBlockPhi->addIncoming (ResumePhi->getIncomingValueForBlock (Incoming),
7609
- Incoming);
7610
- else
7611
- BCBlockPhi->addIncoming (RdxDesc.getRecurrenceStartValue (), Incoming);
7609
+ if (is_contained (MainResumePhi->blocks (), Incoming)) {
7610
+ assert (EpiResumePhi->getIncomingValueForBlock (Incoming) ==
7611
+ RdxDesc.getRecurrenceStartValue () &&
7612
+ " Trying to reset unexpected value" );
7613
+ assert (!Updated && " Should update at most 1 incoming value" );
7614
+ EpiResumePhi->setIncomingValueForBlock (
7615
+ Incoming, MainResumePhi->getIncomingValueForBlock (Incoming));
7616
+ Updated = true ;
7617
+ }
7612
7618
}
7613
-
7614
- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
7615
- // TODO: This fixup should instead be modeled in VPlan.
7616
- // Fix the scalar loop reduction variable with the incoming reduction sum
7617
- // from the vector body and from the backedge value.
7618
- int IncomingEdgeBlockIdx =
7619
- OrigPhi->getBasicBlockIndex (OrigLoop->getLoopLatch ());
7620
- assert (IncomingEdgeBlockIdx >= 0 && " Invalid block index" );
7621
- // Pick the other block.
7622
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1 );
7623
- OrigPhi->setIncomingValue (SelfEdgeBlockIdx, BCBlockPhi);
7624
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr ();
7625
- OrigPhi->setIncomingValue (IncomingEdgeBlockIdx, LoopExitInst);
7619
+ assert (Updated && " Must update EpiResumePhi." );
7620
+ (void )Updated;
7626
7621
}
7627
7622
7628
7623
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan (
@@ -7713,11 +7708,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7713
7708
// 2.5 Collect reduction resume values.
7714
7709
auto *ExitVPBB =
7715
7710
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7716
- for (VPRecipeBase &R : *ExitVPBB) {
7717
- createAndCollectMergePhiForReduction (
7718
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7719
- State.CFG .VPBB2IRBB [ExitVPBB], VectorizingEpilogue );
7720
- }
7711
+ if (VectorizingEpilogue)
7712
+ for (VPRecipeBase &R : *ExitVPBB) {
7713
+ fixReductionScalarResumeWhenVectorizingEpilog (
7714
+ &R, State, State .CFG .VPBB2IRBB [ExitVPBB]);
7715
+ }
7721
7716
7722
7717
// 2.6. Maintain Loop Hints
7723
7718
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9518
9513
});
9519
9514
FinalReductionResult->insertBefore (*MiddleVPBB, IP);
9520
9515
9516
+ // Order is strict: if there are multiple successors, the first is the exit
9517
+ // block, second is the scalar preheader.
9518
+ VPBasicBlock *ScalarPHVPBB =
9519
+ cast<VPBasicBlock>(MiddleVPBB->getSuccessors ().back ());
9520
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
9521
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
9522
+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue ()},
9523
+ {}, " bc.merge.rdx" );
9524
+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr ());
9525
+ Plan->addLiveOut (RedPhi, ResumePhiRecipe);
9526
+
9521
9527
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9522
9528
// with a boolean reduction phi node to check if the condition is true in
9523
9529
// any iteration. The final value is selected by the final
0 commit comments