@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
542
542
protected:
543
543
friend class LoopVectorizationPlanner ;
544
544
545
- // / Set up the values of the IVs correctly when exiting the vector loop.
546
- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
547
- Value *VectorTripCount, BasicBlock *MiddleBlock,
548
- VPTransformState &State);
549
-
550
545
// / Iteratively sink the scalarized operands of a predicated instruction into
551
546
// / the block that was created for it.
552
547
void sinkScalarOperands (Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
775
770
BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
776
771
void printDebugTracesAtStart () override ;
777
772
void printDebugTracesAtEnd () override ;
778
-
779
- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
780
- Value *VectorTripCount, BasicBlock *MiddleBlock,
781
- VPTransformState &State) override {};
782
773
};
783
774
784
775
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2751
2742
return LoopVectorPreHeader;
2752
2743
}
2753
2744
2754
- // Fix up external users of the induction variable. At this point, we are
2755
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2756
- // coming from the remainder loop. We need those PHIs to also have a correct
2757
- // value for the IV when arriving directly from the middle block.
2758
- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2759
- const InductionDescriptor &II,
2760
- Value *VectorTripCount,
2761
- BasicBlock *MiddleBlock,
2762
- VPTransformState &State) {
2763
- // There are two kinds of external IV usages - those that use the value
2764
- // computed in the last iteration (the PHI) and those that use the penultimate
2765
- // value (the value that feeds into the phi from the loop latch).
2766
- // We allow both, but they, obviously, have different values.
2767
-
2768
- DenseMap<Value *, Value *> MissingVals;
2769
-
2770
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2771
- OrigLoop->getLoopPreheader ()))
2772
- ->getIncomingValueForBlock (MiddleBlock);
2773
-
2774
- // An external user of the last iteration's value should see the value that
2775
- // the remainder loop uses to initialize its own IV.
2776
- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2777
- for (User *U : PostInc->users ()) {
2778
- Instruction *UI = cast<Instruction>(U);
2779
- if (!OrigLoop->contains (UI)) {
2780
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2781
- MissingVals[UI] = EndValue;
2782
- }
2783
- }
2784
-
2785
- // An external user of the penultimate value need to see EndValue - Step.
2786
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2787
- // that is Start + (Step * (CRD - 1)).
2788
- for (User *U : OrigPhi->users ()) {
2789
- auto *UI = cast<Instruction>(U);
2790
- if (!OrigLoop->contains (UI)) {
2791
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2792
- IRBuilder<> B (MiddleBlock->getTerminator ());
2793
-
2794
- // Fast-math-flags propagate from the original induction instruction.
2795
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2796
- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2797
-
2798
- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2799
- assert (StepVPV && " step must have been expanded during VPlan execution" );
2800
- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2801
- : State.get (StepVPV, VPLane (0 ));
2802
- Value *Escape = nullptr ;
2803
- if (EndValue->getType ()->isIntegerTy ())
2804
- Escape = B.CreateSub (EndValue, Step);
2805
- else if (EndValue->getType ()->isPointerTy ())
2806
- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2807
- else {
2808
- assert (EndValue->getType ()->isFloatingPointTy () &&
2809
- " Unexpected induction type" );
2810
- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2811
- Instruction::FAdd
2812
- ? Instruction::FSub
2813
- : Instruction::FAdd,
2814
- EndValue, Step);
2815
- }
2816
- Escape->setName (" ind.escape" );
2817
- MissingVals[UI] = Escape;
2818
- }
2819
- }
2820
-
2821
- assert ((MissingVals.empty () ||
2822
- all_of (MissingVals,
2823
- [MiddleBlock, this ](const std::pair<Value *, Value *> &P) {
2824
- return all_of (
2825
- predecessors (cast<Instruction>(P.first )->getParent ()),
2826
- [MiddleBlock, this ](BasicBlock *Pred) {
2827
- return Pred == MiddleBlock ||
2828
- Pred == OrigLoop->getLoopLatch ();
2829
- });
2830
- })) &&
2831
- " Expected escaping values from latch/middle.block only" );
2832
-
2833
- for (auto &I : MissingVals) {
2834
- PHINode *PHI = cast<PHINode>(I.first );
2835
- // One corner case we have to handle is two IVs "chasing" each-other,
2836
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2837
- // In this case, if IV1 has an external use, we need to avoid adding both
2838
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2839
- // don't already have an incoming value for the middle block.
2840
- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2841
- PHI->addIncoming (I.second , MiddleBlock);
2842
- }
2843
- }
2844
-
2845
2745
namespace {
2846
2746
2847
2747
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2986
2886
for (PHINode &PN : Exit->phis ())
2987
2887
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
2988
2888
2989
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2990
- // No edge from the middle block to the unique exit block has been inserted
2991
- // and there is nothing to fix from vector loop; phis should have incoming
2992
- // from scalar loop only.
2993
- } else {
2994
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2995
- // the cost model.
2996
-
2997
- // If we inserted an edge from the middle block to the unique exit block,
2998
- // update uses outside the loop (phis) to account for the newly inserted
2999
- // edge.
3000
-
3001
- // Fix-up external users of the induction variables.
3002
- for (const auto &Entry : Legal->getInductionVars ())
3003
- fixupIVUsers (Entry.first , Entry.second ,
3004
- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
3005
- }
3006
-
3007
2889
for (Instruction *PI : PredicatedInstructions)
3008
2890
sinkScalarOperands (&*PI);
3009
2891
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8857
8739
// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8858
8740
// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8859
8741
// / the end value of the induction.
8860
- static VPValue *addResumePhiRecipeForInduction (VPWidenInductionRecipe *WideIV,
8861
- VPBuilder &VectorPHBuilder,
8862
- VPBuilder &ScalarPHBuilder,
8863
- VPTypeAnalysis &TypeInfo,
8864
- VPValue *VectorTC) {
8742
+ static VPValue *addResumePhiRecipeForInduction (
8743
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8744
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8745
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8865
8746
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8866
8747
// Truncated wide inductions resume from the last lane of their vector value
8867
8748
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8886
8767
ScalarTypeOfWideIV);
8887
8768
}
8888
8769
8770
+ EndValues[WideIV] = EndValue;
8889
8771
auto *ResumePhiRecipe =
8890
8772
ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi, {EndValue, Start},
8891
8773
WideIV->getDebugLoc (), " bc.resume.val" );
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
8895
8777
// / Create resume phis in the scalar preheader for first-order recurrences,
8896
8778
// / reductions and inductions, and update the VPIRInstructions wrapping the
8897
8779
// / original phis in the scalar header.
8898
- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8780
+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8781
+ Loop *OrigLoop,
8782
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8899
8783
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8900
8784
auto *ScalarPH = Plan.getScalarPreheader ();
8901
8785
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8915
8799
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8916
8800
if (VPValue *ResumePhi = addResumePhiRecipeForInduction (
8917
8801
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8918
- &Plan.getVectorTripCount ())) {
8802
+ &Plan.getVectorTripCount (), EndValues )) {
8919
8803
ScalarPhiIRI->addOperand (ResumePhi);
8920
8804
continue ;
8921
8805
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8949
8833
// modeled explicitly yet and won't be included. Those are un-truncated
8950
8834
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8951
8835
// increments.
8952
- static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
8953
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8954
- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8836
+ static SetVector<VPIRInstruction *>
8837
+ collectUsersInExitBlocks ( Loop *OrigLoop, VPRecipeBuilder &Builder,
8838
+ VPlan &Plan ) {
8955
8839
auto *MiddleVPBB = Plan.getMiddleBlock ();
8956
8840
SetVector<VPIRInstruction *> ExitUsersToFix;
8957
8841
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8976
8860
// Exit values for inductions are computed and updated outside of VPlan
8977
8861
// and independent of induction recipes.
8978
8862
// TODO: Compute induction exit values in VPlan.
8979
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8980
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8981
- isa<VPWidenPointerInductionRecipe>(V) ||
8982
- (isa<Instruction>(IncomingValue) &&
8983
- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8984
- any_of (IncomingValue->users (), [&Inductions](User *U) {
8985
- auto *P = dyn_cast<PHINode>(U);
8986
- return P && Inductions.contains (P);
8987
- }))) {
8988
- if (ExitVPBB->getSinglePredecessor () == MiddleVPBB)
8989
- continue ;
8990
- }
8991
8863
ExitUsersToFix.insert (ExitIRI);
8992
8864
ExitIRI->addOperand (V);
8993
8865
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8996
8868
return ExitUsersToFix;
8997
8869
}
8998
8870
8871
+ // / If \p Incoming is a user of a non-truncated induction, create recipes to
8872
+ // / compute the final value and update the user \p ExitIRI.
8873
+ static bool addInductionEndValue (
8874
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
8875
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8876
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
8877
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8878
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst ()) ||
8879
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8880
+ (isa<Instruction>(Incoming->getUnderlyingValue ()) &&
8881
+ any_of (cast<Instruction>(Incoming->getUnderlyingValue ())->users (),
8882
+ [&Inductions](User *U) {
8883
+ auto *P = dyn_cast<PHINode>(U);
8884
+ return P && Inductions.contains (P);
8885
+ }))) {
8886
+ VPValue *IV;
8887
+ if (auto *WideIV =
8888
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()))
8889
+ IV = WideIV;
8890
+ else if (auto *WideIV =
8891
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()
8892
+ ->getOperand (0 )
8893
+ ->getDefiningRecipe ()))
8894
+ IV = WideIV;
8895
+ else
8896
+ IV = Incoming->getDefiningRecipe ()->getOperand (1 );
8897
+ // Skip phi nodes already updated. This can be the case if 2 induction
8898
+ // phis chase each other.
8899
+ VPValue *EndValue = EndValues[IV];
8900
+ if (any_of (cast<VPRecipeBase>(Incoming->getDefiningRecipe ())->operands (),
8901
+ IsaPred<VPWidenIntOrFpInductionRecipe,
8902
+ VPWidenPointerInductionRecipe>)) {
8903
+ ExitIRI->setOperand (0 , EndValue);
8904
+ return true ;
8905
+ }
8906
+
8907
+ VPBuilder B (Plan.getMiddleBlock ()->getTerminator ());
8908
+ VPValue *Escape = nullptr ;
8909
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe ());
8910
+ VPValue *Step = WideIV->getStepValue ();
8911
+ Type *ScalarTy = TypeInfo.inferScalarType (WideIV);
8912
+ if (ScalarTy->isIntegerTy ())
8913
+ Escape =
8914
+ B.createNaryOp (Instruction::Sub, {EndValue, Step}, {}, " ind.escape" );
8915
+ else if (ScalarTy->isPointerTy ())
8916
+ Escape = B.createPtrAdd (
8917
+ EndValue,
8918
+ B.createNaryOp (Instruction::Sub,
8919
+ {Plan.getOrAddLiveIn (ConstantInt::get (
8920
+ Step->getLiveInIRValue ()->getType (), 0 )),
8921
+ Step}),
8922
+ {}, " ind.escape" );
8923
+ else if (ScalarTy->isFloatingPointTy ()) {
8924
+ const auto &ID = WideIV->getInductionDescriptor ();
8925
+ Escape = B.createNaryOp (
8926
+ ID.getInductionBinOp ()->getOpcode () == Instruction::FAdd
8927
+ ? Instruction::FSub
8928
+ : Instruction::FAdd,
8929
+ {EndValue, Step}, {ID.getInductionBinOp ()->getFastMathFlags ()});
8930
+ } else {
8931
+ llvm_unreachable (" all possible induction types must be handled" );
8932
+ }
8933
+ ExitIRI->setOperand (0 , Escape);
8934
+ return true ;
8935
+ }
8936
+ return false ;
8937
+ }
8999
8938
// Add exit values to \p Plan. Extracts are added for each entry in \p
9000
8939
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9001
8940
// exit users can be handled, otherwise return false.
9002
- static bool
9003
- addUsersInExitBlocks (VPlan &Plan,
9004
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8941
+ static bool addUsersInExitBlocks (
8942
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8943
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8944
+ DenseMap<VPValue *, VPValue *> &EndValues) {
9005
8945
if (ExitUsersToFix.empty ())
9006
8946
return true ;
9007
8947
9008
8948
auto *MiddleVPBB = Plan.getMiddleBlock ();
9009
8949
VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8950
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
9010
8951
9011
8952
// Introduce extract for exiting values and update the VPIRInstructions
9012
8953
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
9022
8963
if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
9023
8964
return false ;
9024
8965
8966
+ VPValue *Incoming = ExitIRI->getOperand (0 );
8967
+ if (addInductionEndValue (Plan, ExitIRI, Incoming, Inductions, EndValues,
8968
+ TypeInfo))
8969
+ continue ;
8970
+
9025
8971
LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
9026
8972
VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
9027
8973
{Op, Plan.getOrAddLiveIn (ConstantInt::get (
9028
8974
IntegerType::get (Ctx, 32 ), 1 ))});
9029
- ExitIRI->setOperand (Idx , Ext);
8975
+ ExitIRI->setOperand (0 , Ext);
9030
8976
}
9031
8977
}
9032
8978
return true ;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9307
9253
VPlanTransforms::handleUncountableEarlyExit (
9308
9254
*Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9309
9255
}
9310
- addScalarResumePhis (RecipeBuilder, *Plan);
9311
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9312
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9256
+ DenseMap<VPValue *, VPValue *> EndValues;
9257
+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
9258
+ SetVector<VPIRInstruction *> ExitUsersToFix =
9259
+ collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
9313
9260
addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9314
- if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9261
+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix, Legal->getInductionVars (),
9262
+ EndValues)) {
9315
9263
reportVectorizationFailure (
9316
9264
" Some exit values in loop with uncountable exit not supported yet" ,
9317
9265
" UncountableEarlyExitLoopsUnsupportedExitValue" , ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9438
9386
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9439
9387
RecipeBuilder.setRecipe (HeaderR->getUnderlyingInstr (), HeaderR);
9440
9388
}
9441
- addScalarResumePhis (RecipeBuilder, *Plan);
9389
+ DenseMap<VPValue *, VPValue *> EndValues;
9390
+ addScalarResumePhis (RecipeBuilder, *Plan, OrigLoop, EndValues);
9442
9391
9443
9392
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9444
9393
return Plan;
0 commit comments