@@ -7659,14 +7659,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
7659
7659
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (
7660
7660
RdxDesc.getRecurrenceKind ())) {
7661
7661
using namespace llvm ::PatternMatch;
7662
- Value *Cmp, *OrigResumeV;
7662
+ Value *Cmp, *OrigResumeV, *CmpOp ;
7663
7663
bool IsExpectedPattern =
7664
7664
match (MainResumeValue, m_Select (m_OneUse (m_Value (Cmp)),
7665
7665
m_Specific (RdxDesc.getSentinelValue ()),
7666
7666
m_Value (OrigResumeV))) &&
7667
- match (Cmp,
7668
- m_SpecificICmp (ICmpInst::ICMP_EQ, m_Specific (OrigResumeV),
7669
- m_Specific (RdxDesc.getRecurrenceStartValue ())));
7667
+ (match (Cmp, m_SpecificICmp (ICmpInst::ICMP_EQ, m_Specific (OrigResumeV),
7668
+ m_Value (CmpOp))) &&
7669
+ (match (CmpOp,
7670
+ m_Freeze (m_Specific (RdxDesc.getRecurrenceStartValue ()))) ||
7671
+ (CmpOp == RdxDesc.getRecurrenceStartValue () &&
7672
+ isGuaranteedNotToBeUndefOrPoison (CmpOp))));
7670
7673
assert (IsExpectedPattern && " Unexpected reduction resume pattern" );
7671
7674
(void )IsExpectedPattern;
7672
7675
MainResumeValue = OrigResumeV;
@@ -10374,6 +10377,36 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
10374
10377
VPlanTransforms::runPass (VPlanTransforms::removeDeadRecipes, MainPlan);
10375
10378
10376
10379
using namespace VPlanPatternMatch ;
10380
+ // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
10381
+ // uses of undef/poison. If the reduction start value may be undef or poison
10382
+ // it needs to be frozen and the frozen start has to be used when computing
10383
+ // the reduction result. We also need to use the frozen value in the resume
10384
+ // phi generated by the main vector loop, as this is also used to compute the
10385
+ // reduction result after the epilogue vector loop.
10386
+ auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
10387
+ bool UpdateResumePhis) {
10388
+ VPBuilder Builder (Plan.getEntry ());
10389
+ for (VPRecipeBase &R : *Plan.getMiddleBlock ()) {
10390
+ auto *VPI = dyn_cast<VPInstruction>(&R);
10391
+ if (!VPI || VPI->getOpcode () != VPInstruction::ComputeFindLastIVResult)
10392
+ continue ;
10393
+ VPValue *OrigStart = VPI->getOperand (1 );
10394
+ if (isGuaranteedNotToBeUndefOrPoison (OrigStart->getLiveInIRValue ()))
10395
+ continue ;
10396
+ VPInstruction *Freeze =
10397
+ Builder.createNaryOp (Instruction::Freeze, {OrigStart}, {}, " fr" );
10398
+ VPI->setOperand (1 , Freeze);
10399
+ if (UpdateResumePhis)
10400
+ OrigStart->replaceUsesWithIf (Freeze, [Freeze](VPUser &U, unsigned ) {
10401
+ return Freeze != &U && isa<VPInstruction>(&U) &&
10402
+ cast<VPInstruction>(&U)->getOpcode () ==
10403
+ VPInstruction::ResumePhi;
10404
+ });
10405
+ }
10406
+ };
10407
+ AddFreezeForFindLastIVReductions (MainPlan, true );
10408
+ AddFreezeForFindLastIVReductions (EpiPlan, false );
10409
+
10377
10410
VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader ();
10378
10411
VPValue *VectorTC = &MainPlan.getVectorTripCount ();
10379
10412
// If there is a suitable resume value for the canonical induction in the
@@ -10401,24 +10434,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10401
10434
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock ();
10402
10435
Header->setName (" vec.epilog.vector.body" );
10403
10436
10404
- // Re-use the trip count and steps expanded for the main loop, as
10405
- // skeleton creation needs it as a value that dominates both the scalar
10406
- // and vector epilogue loops
10407
- // TODO: This is a workaround needed for epilogue vectorization and it
10408
- // should be removed once induction resume value creation is done
10409
- // directly in VPlan.
10410
- for (auto &R : make_early_inc_range (*Plan.getEntry ())) {
10411
- auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10412
- if (!ExpandR)
10413
- continue ;
10414
- auto *ExpandedVal =
10415
- Plan.getOrAddLiveIn (ExpandedSCEVs.find (ExpandR->getSCEV ())->second );
10416
- ExpandR->replaceAllUsesWith (ExpandedVal);
10417
- if (Plan.getTripCount () == ExpandR)
10418
- Plan.resetTripCount (ExpandedVal);
10419
- ExpandR->eraseFromParent ();
10420
- }
10421
-
10437
+ DenseMap<Value *, Value *> ToFrozen;
10422
10438
// Ensure that the start values for all header phi recipes are updated before
10423
10439
// vectorizing the epilogue loop.
10424
10440
for (VPRecipeBase &R : Header->phis ()) {
@@ -10484,6 +10500,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10484
10500
ResumeV =
10485
10501
Builder.CreateICmpNE (ResumeV, RdxDesc.getRecurrenceStartValue ());
10486
10502
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind (RK)) {
10503
+ ToFrozen[RdxDesc.getRecurrenceStartValue ()] =
10504
+ cast<PHINode>(ResumeV)->getIncomingValueForBlock (
10505
+ EPI.MainLoopIterationCountCheck );
10506
+
10487
10507
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
10488
10508
// to the resume value. The resume value is adjusted to the sentinel
10489
10509
// value when the final value from the main vector loop equals the start
@@ -10492,8 +10512,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10492
10512
// variable.
10493
10513
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent ();
10494
10514
IRBuilder<> Builder (ResumeBB, ResumeBB->getFirstNonPHIIt ());
10495
- Value *Cmp =
10496
- Builder. CreateICmpEQ ( ResumeV, RdxDesc.getRecurrenceStartValue ());
10515
+ Value *Cmp = Builder. CreateICmpEQ (
10516
+ ResumeV, ToFrozen[ RdxDesc.getRecurrenceStartValue ()] );
10497
10517
ResumeV =
10498
10518
Builder.CreateSelect (Cmp, RdxDesc.getSentinelValue (), ResumeV);
10499
10519
}
@@ -10509,6 +10529,35 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
10509
10529
VPValue *StartVal = Plan.getOrAddLiveIn (ResumeV);
10510
10530
cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
10511
10531
}
10532
+
10533
+ // For some VPValues in the epilogue plan we must re-use the generated IR
10534
+ // values from the main plan. Replace them with live-in VPValues.
10535
+ // TODO: This is a workaround needed for epilogue vectorization and it
10536
+ // should be removed once induction resume value creation is done
10537
+ // directly in VPlan.
10538
+ for (auto &R : make_early_inc_range (*Plan.getEntry ())) {
10539
+ // Re-use frozen values from the main plan for Freeze VPInstructions in the
10540
+ // epilogue plan. This ensures all users use the same frozen value.
10541
+ auto *VPI = dyn_cast<VPInstruction>(&R);
10542
+ if (VPI && VPI->getOpcode () == Instruction::Freeze) {
10543
+ VPI->replaceAllUsesWith (Plan.getOrAddLiveIn (
10544
+ ToFrozen.lookup (VPI->getOperand (0 )->getLiveInIRValue ())));
10545
+ continue ;
10546
+ }
10547
+
10548
+ // Re-use the trip count and steps expanded for the main loop, as
10549
+ // skeleton creation needs it as a value that dominates both the scalar
10550
+ // and vector epilogue loops
10551
+ auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10552
+ if (!ExpandR)
10553
+ continue ;
10554
+ auto *ExpandedVal =
10555
+ Plan.getOrAddLiveIn (ExpandedSCEVs.find (ExpandR->getSCEV ())->second );
10556
+ ExpandR->replaceAllUsesWith (ExpandedVal);
10557
+ if (Plan.getTripCount () == ExpandR)
10558
+ Plan.resetTripCount (ExpandedVal);
10559
+ ExpandR->eraseFromParent ();
10560
+ }
10512
10561
}
10513
10562
10514
10563
// Generate bypass values from the additional bypass block. Note that when the
0 commit comments