@@ -779,10 +779,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
779
779
BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
780
780
void printDebugTracesAtStart () override ;
781
781
void printDebugTracesAtEnd () override ;
782
-
783
- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
784
- Value *VectorTripCount, BasicBlock *MiddleBlock,
785
- VPlan &Plan, VPTransformState &State) override {};
786
782
};
787
783
788
784
// A specialized derived class of inner loop vectorizer that performs
@@ -2697,87 +2693,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
2697
2693
return {LoopVectorPreHeader, nullptr };
2698
2694
}
2699
2695
2700
- // Fix up external users of the induction variable. At this point, we are
2701
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2702
- // coming from the remainder loop. We need those PHIs to also have a correct
2703
- // value for the IV when arriving directly from the middle block.
2704
- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2705
- const InductionDescriptor &II,
2706
- Value *VectorTripCount,
2707
- BasicBlock *MiddleBlock, VPlan &Plan,
2708
- VPTransformState &State) {
2709
- // There are two kinds of external IV usages - those that use the value
2710
- // computed in the last iteration (the PHI) and those that use the penultimate
2711
- // value (the value that feeds into the phi from the loop latch).
2712
- // We allow both, but they, obviously, have different values.
2713
-
2714
- assert (OrigLoop->getUniqueExitBlock () && " Expected a single exit block" );
2715
-
2716
- DenseMap<Value *, Value *> MissingVals;
2717
-
2718
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2719
- OrigLoop->getLoopPreheader ()))
2720
- ->getIncomingValueForBlock (MiddleBlock);
2721
-
2722
- // An external user of the last iteration's value should see the value that
2723
- // the remainder loop uses to initialize its own IV.
2724
- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2725
- for (User *U : PostInc->users ()) {
2726
- Instruction *UI = cast<Instruction>(U);
2727
- if (!OrigLoop->contains (UI)) {
2728
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2729
- MissingVals[UI] = EndValue;
2730
- }
2731
- }
2732
-
2733
- // An external user of the penultimate value need to see EndValue - Step.
2734
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2735
- // that is Start + (Step * (CRD - 1)).
2736
- for (User *U : OrigPhi->users ()) {
2737
- auto *UI = cast<Instruction>(U);
2738
- if (!OrigLoop->contains (UI)) {
2739
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2740
- IRBuilder<> B (MiddleBlock->getTerminator ());
2741
-
2742
- // Fast-math-flags propagate from the original induction instruction.
2743
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2744
- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2745
-
2746
- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2747
- assert (StepVPV && " step must have been expanded during VPlan execution" );
2748
- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2749
- : State.get (StepVPV, VPLane (0 ));
2750
- Value *Escape = nullptr ;
2751
- if (EndValue->getType ()->isIntegerTy ())
2752
- Escape = B.CreateSub (EndValue, Step);
2753
- else if (EndValue->getType ()->isPointerTy ())
2754
- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2755
- else if (EndValue->getType ()->isFloatingPointTy ()) {
2756
- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2757
- Instruction::FAdd
2758
- ? Instruction::FSub
2759
- : Instruction::FAdd,
2760
- EndValue, Step);
2761
- } else {
2762
- llvm_unreachable (" all possible induction types must be handled" );
2763
- }
2764
- Escape->setName (" ind.escape" );
2765
- MissingVals[UI] = Escape;
2766
- }
2767
- }
2768
-
2769
- for (auto &I : MissingVals) {
2770
- PHINode *PHI = cast<PHINode>(I.first );
2771
- // One corner case we have to handle is two IVs "chasing" each-other,
2772
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2773
- // In this case, if IV1 has an external use, we need to avoid adding both
2774
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2775
- // don't already have an incoming value for the middle block.
2776
- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2777
- PHI->addIncoming (I.second , MiddleBlock);
2778
- }
2779
- }
2780
-
2781
2696
namespace {
2782
2697
2783
2698
struct CSEDenseMapInfo {
@@ -2907,25 +2822,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2907
2822
for (PHINode &PN : Exit->phis ())
2908
2823
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
2909
2824
2910
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2911
- // No edge from the middle block to the unique exit block has been inserted
2912
- // and there is nothing to fix from vector loop; phis should have incoming
2913
- // from scalar loop only.
2914
- } else {
2915
- // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2916
- // the cost model.
2917
-
2918
- // If we inserted an edge from the middle block to the unique exit block,
2919
- // update uses outside the loop (phis) to account for the newly inserted
2920
- // edge.
2921
-
2922
- // Fix-up external users of the induction variables.
2923
- for (const auto &Entry : Legal->getInductionVars ())
2924
- fixupIVUsers (Entry.first , Entry.second ,
2925
- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, Plan,
2926
- State);
2927
- }
2928
-
2929
2825
for (Instruction *PI : PredicatedInstructions)
2930
2826
sinkScalarOperands (&*PI);
2931
2827
@@ -8821,7 +8717,7 @@ addUsersInExitBlock(VPlan &Plan,
8821
8717
}
8822
8718
}
8823
8719
8824
- static void addResumeValuesForInductions (VPlan &Plan) {
8720
+ static void addResumeValuesForInductions (VPlan &Plan, Loop *OrigLoop ) {
8825
8721
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8826
8722
VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8827
8723
@@ -8870,9 +8766,11 @@ static void addResumeValuesForInductions(VPlan &Plan) {
8870
8766
cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8871
8767
8872
8768
VPBasicBlock *ScalarPHVPBB = nullptr ;
8769
+ VPBasicBlock *ExitVPBB = nullptr ;
8873
8770
if (MiddleVPBB->getNumSuccessors () == 2 ) {
8874
8771
// Order is strict: first is the exit block, second is the scalar
8875
8772
// preheader.
8773
+ ExitVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[0 ]);
8876
8774
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8877
8775
} else {
8878
8776
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
@@ -8886,6 +8784,53 @@ static void addResumeValuesForInductions(VPlan &Plan) {
8886
8784
auto *ScalarLoopHeader =
8887
8785
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8888
8786
addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8787
+
8788
+ if (ExitVPBB) {
8789
+
8790
+ Value *PostInc =
8791
+ OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
8792
+ for (auto &R : *ExitVPBB) {
8793
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
8794
+ auto *IRI = &VPIRInst->getInstruction ();
8795
+ if (!isa<PHINode>(IRI))
8796
+ break ;
8797
+ // Skip phi nodes already updated. This can be the case if 2 induction
8798
+ // phis chase each other.
8799
+ if (VPIRInst->getNumOperands () == 1 )
8800
+ continue ;
8801
+ if (any_of (IRI->operands (),
8802
+ [PostInc](Value *Op) { return Op == PostInc; })) {
8803
+ VPIRInst->addOperand (EndValue);
8804
+ continue ;
8805
+ }
8806
+
8807
+ if (any_of (IRI->operands (),
8808
+ [OrigPhi](Value *Op) { return Op == OrigPhi; })) {
8809
+ VPBuilder B (MiddleVPBB->getTerminator ());
8810
+ VPValue *Escape = nullptr ;
8811
+ if (ScalarTy->isIntegerTy ())
8812
+ Escape = B.createNaryOp (Instruction::Sub, {EndValue, Step});
8813
+ else if (ScalarTy->isPointerTy ())
8814
+ Escape = B.createPtrAdd (
8815
+ EndValue,
8816
+ B.createNaryOp (
8817
+ Instruction::Xor,
8818
+ {Step, Plan.getOrAddLiveIn (ConstantInt::get (
8819
+ Step->getLiveInIRValue ()->getType (), -1 ))}));
8820
+ else if (ScalarTy->isFloatingPointTy ()) {
8821
+ Escape = B.createNaryOp (
8822
+ ID->getInductionBinOp ()->getOpcode () == Instruction::FAdd
8823
+ ? Instruction::FSub
8824
+ : Instruction::FAdd,
8825
+ {EndValue, Step},
8826
+ {ID->getInductionBinOp ()->getFastMathFlags ()});
8827
+ } else {
8828
+ llvm_unreachable (" all possible induction types must be handled" );
8829
+ }
8830
+ VPIRInst->addOperand (Escape);
8831
+ }
8832
+ }
8833
+ }
8889
8834
}
8890
8835
}
8891
8836
@@ -9199,7 +9144,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9199
9144
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9200
9145
addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9201
9146
addUsersInExitBlock (*Plan, ExitUsersToFix);
9202
- addResumeValuesForInductions (*Plan);
9147
+ addResumeValuesForInductions (*Plan, OrigLoop );
9203
9148
9204
9149
// ---------------------------------------------------------------------------
9205
9150
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9305,7 +9250,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9305
9250
bool HasNUW = true ;
9306
9251
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
9307
9252
DebugLoc ());
9308
- addResumeValuesForInductions (*Plan);
9253
+ addResumeValuesForInductions (*Plan, OrigLoop );
9309
9254
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9310
9255
return Plan;
9311
9256
}
0 commit comments