@@ -552,11 +552,6 @@ class InnerLoopVectorizer {
552
552
protected:
553
553
friend class LoopVectorizationPlanner ;
554
554
555
- // / Set up the values of the IVs correctly when exiting the vector loop.
556
- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
557
- Value *VectorTripCount, BasicBlock *MiddleBlock,
558
- VPTransformState &State);
559
-
560
555
// / Iteratively sink the scalarized operands of a predicated instruction into
561
556
// / the block that was created for it.
562
557
void sinkScalarOperands (Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785
780
BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
786
781
void printDebugTracesAtStart () override ;
787
782
void printDebugTracesAtEnd () override ;
788
-
789
- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
790
- Value *VectorTripCount, BasicBlock *MiddleBlock,
791
- VPTransformState &State) override {};
792
783
};
793
784
794
785
// A specialized derived class of inner loop vectorizer that performs
@@ -2768,88 +2759,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2768
2759
return LoopVectorPreHeader;
2769
2760
}
2770
2761
2771
- // Fix up external users of the induction variable. At this point, we are
2772
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2773
- // coming from the remainder loop. We need those PHIs to also have a correct
2774
- // value for the IV when arriving directly from the middle block.
2775
- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2776
- const InductionDescriptor &II,
2777
- Value *VectorTripCount,
2778
- BasicBlock *MiddleBlock,
2779
- VPTransformState &State) {
2780
- // There are two kinds of external IV usages - those that use the value
2781
- // computed in the last iteration (the PHI) and those that use the penultimate
2782
- // value (the value that feeds into the phi from the loop latch).
2783
- // We allow both, but they, obviously, have different values.
2784
-
2785
- DenseMap<Value *, Value *> MissingVals;
2786
-
2787
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2788
- OrigLoop->getLoopPreheader ()))
2789
- ->getIncomingValueForBlock (MiddleBlock);
2790
-
2791
- // An external user of the last iteration's value should see the value that
2792
- // the remainder loop uses to initialize its own IV.
2793
- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2794
- for (User *U : PostInc->users ()) {
2795
- Instruction *UI = cast<Instruction>(U);
2796
- if (!OrigLoop->contains (UI)) {
2797
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2798
- MissingVals[UI] = EndValue;
2799
- }
2800
- }
2801
-
2802
- // An external user of the penultimate value need to see EndValue - Step.
2803
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2804
- // that is Start + (Step * (CRD - 1)).
2805
- for (User *U : OrigPhi->users ()) {
2806
- auto *UI = cast<Instruction>(U);
2807
- if (!OrigLoop->contains (UI)) {
2808
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2809
- IRBuilder<> B (MiddleBlock->getTerminator ());
2810
-
2811
- // Fast-math-flags propagate from the original induction instruction.
2812
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2813
- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2814
-
2815
- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2816
- assert (StepVPV && " step must have been expanded during VPlan execution" );
2817
- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2818
- : State.get (StepVPV, VPLane (0 ));
2819
- Value *Escape = nullptr ;
2820
- if (EndValue->getType ()->isIntegerTy ())
2821
- Escape = B.CreateSub (EndValue, Step);
2822
- else if (EndValue->getType ()->isPointerTy ())
2823
- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2824
- else {
2825
- assert (EndValue->getType ()->isFloatingPointTy () &&
2826
- " Unexpected induction type" );
2827
- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2828
- Instruction::FAdd
2829
- ? Instruction::FSub
2830
- : Instruction::FAdd,
2831
- EndValue, Step);
2832
- }
2833
- Escape->setName (" ind.escape" );
2834
- MissingVals[UI] = Escape;
2835
- }
2836
- }
2837
-
2838
- assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2839
- " Expected a single exit block for escaping values" );
2840
-
2841
- for (auto &I : MissingVals) {
2842
- PHINode *PHI = cast<PHINode>(I.first );
2843
- // One corner case we have to handle is two IVs "chasing" each-other,
2844
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2845
- // In this case, if IV1 has an external use, we need to avoid adding both
2846
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2847
- // don't already have an incoming value for the middle block.
2848
- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2849
- PHI->addIncoming (I.second , MiddleBlock);
2850
- }
2851
- }
2852
-
2853
2762
namespace {
2854
2763
2855
2764
struct CSEDenseMapInfo {
@@ -2978,24 +2887,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2978
2887
for (PHINode &PN : Exit->phis ())
2979
2888
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
2980
2889
2981
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
2982
- // No edge from the middle block to the unique exit block has been inserted
2983
- // and there is nothing to fix from vector loop; phis should have incoming
2984
- // from scalar loop only.
2985
- } else {
2986
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
2987
- // the cost model.
2988
-
2989
- // If we inserted an edge from the middle block to the unique exit block,
2990
- // update uses outside the loop (phis) to account for the newly inserted
2991
- // edge.
2992
-
2993
- // Fix-up external users of the induction variables.
2994
- for (const auto &Entry : Legal->getInductionVars ())
2995
- fixupIVUsers (Entry.first , Entry.second ,
2996
- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
2997
- }
2998
-
2999
2890
for (Instruction *PI : PredicatedInstructions)
3000
2891
sinkScalarOperands (&*PI);
3001
2892
@@ -8839,11 +8730,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8839
8730
// / Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
8840
8731
// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8841
8732
// / the end value of the induction.
8842
- static VPValue *addResumeValuesForInduction (VPHeaderPHIRecipe *PhiR,
8843
- VPBuilder &VectorPHBuilder,
8844
- VPBuilder &ScalarPHBuilder,
8845
- VPTypeAnalysis &TypeInfo,
8846
- VPValue *VectorTC) {
8733
+ static VPValue *addResumeValuesForInduction (
8734
+ VPHeaderPHIRecipe *PhiR, VPBuilder &VectorPHBuilder,
8735
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
8736
+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
8847
8737
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(PhiR);
8848
8738
if (!WideIV)
8849
8739
return nullptr ;
@@ -8875,6 +8765,7 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8875
8765
ScalarTy);
8876
8766
}
8877
8767
8768
+ EndValues[PhiR] = EndValue;
8878
8769
auto *ResumePhiRecipe =
8879
8770
ScalarPHBuilder.createNaryOp (VPInstruction::ResumePhi, {EndValue, Start},
8880
8771
WideIV->getDebugLoc (), " bc.resume.val" );
@@ -8886,7 +8777,8 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8886
8777
// / original phis in the scalar header.
8887
8778
static void addScalarResumePhis (
8888
8779
VPlan &Plan,
8889
- function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
8780
+ function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe,
8781
+ Loop *OrigLoop, DenseMap<VPValue *, VPValue *> &EndValues) {
8890
8782
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8891
8783
auto *ScalarPH = Plan.getScalarPreheader ();
8892
8784
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -8905,7 +8797,7 @@ static void addScalarResumePhis(
8905
8797
8906
8798
if (VPValue *ResumePhi = addResumeValuesForInduction (
8907
8799
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8908
- &Plan.getVectorTripCount ())) {
8800
+ &Plan.getVectorTripCount (), OrigLoop, EndValues )) {
8909
8801
ScalarPhiIRI->addOperand (ResumePhi);
8910
8802
continue ;
8911
8803
}
@@ -8937,9 +8829,9 @@ static void addScalarResumePhis(
8937
8829
// modeled explicitly yet and won't be included. Those are un-truncated
8938
8830
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8939
8831
// increments.
8940
- static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
8941
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan ,
8942
- const MapVector<PHINode *, InductionDescriptor> &Inductions ) {
8832
+ static SetVector<VPIRInstruction *>
8833
+ collectUsersInExitBlocks ( Loop *OrigLoop, VPRecipeBuilder &Builder,
8834
+ VPlan &Plan ) {
8943
8835
auto *MiddleVPBB = Plan.getMiddleBlock ();
8944
8836
SetVector<VPIRInstruction *> ExitUsersToFix;
8945
8837
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
@@ -8964,18 +8856,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8964
8856
// Exit values for inductions are computed and updated outside of VPlan
8965
8857
// and independent of induction recipes.
8966
8858
// TODO: Compute induction exit values in VPlan.
8967
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8968
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8969
- isa<VPWidenPointerInductionRecipe>(V) ||
8970
- (isa<Instruction>(IncomingValue) &&
8971
- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8972
- any_of (IncomingValue->users (), [&Inductions](User *U) {
8973
- auto *P = dyn_cast<PHINode>(U);
8974
- return P && Inductions.contains (P);
8975
- }))) {
8976
- if (ExitVPBB->getSinglePredecessor () == MiddleVPBB)
8977
- continue ;
8978
- }
8979
8859
ExitUsersToFix.insert (ExitIRI);
8980
8860
ExitIRI->addOperand (V);
8981
8861
}
@@ -8987,14 +8867,16 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8987
8867
// Add exit values to \p Plan. Extracts are added for each entry in \p
8988
8868
// ExitUsersToFix if needed and their operands are updated. Returns true if all
8989
8869
// exit users can be handled, otherwise return false.
8990
- static bool
8991
- addUsersInExitBlocks (VPlan &Plan,
8992
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8870
+ static bool addUsersInExitBlocks (
8871
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
8872
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
8873
+ DenseMap<VPValue *, VPValue *> &EndValues) {
8993
8874
if (ExitUsersToFix.empty ())
8994
8875
return true ;
8995
8876
8996
8877
auto *MiddleVPBB = Plan.getMiddleBlock ();
8997
8878
VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8879
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8998
8880
8999
8881
// Introduce extract for exiting values and update the VPIRInstructions
9000
8882
// modeling the corresponding LCSSA phis.
@@ -9010,6 +8892,69 @@ addUsersInExitBlocks(VPlan &Plan,
9010
8892
if (ExitIRI->getParent ()->getSinglePredecessor () != MiddleVPBB)
9011
8893
return false ;
9012
8894
8895
+ VPValue *Incoming = ExitIRI->getOperand (0 );
8896
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
8897
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst ()) ||
8898
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
8899
+ (isa<Instruction>(Incoming->getUnderlyingValue ()) &&
8900
+ // OrigLoop->contains(cast<Instruction>(Incoming->getUnderlyingValue()))
8901
+ // &&
8902
+ any_of (cast<Instruction>(Incoming->getUnderlyingValue ())->users (),
8903
+ [&Inductions](User *U) {
8904
+ auto *P = dyn_cast<PHINode>(U);
8905
+ return P && Inductions.contains (P);
8906
+ }))) {
8907
+ VPValue *IV;
8908
+ if (auto *WideIV =
8909
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe ()))
8910
+ IV = WideIV;
8911
+ else if (auto *WideIV = dyn_cast<VPWidenInductionRecipe>(
8912
+ Incoming->getDefiningRecipe ()
8913
+ ->getOperand (0 )
8914
+ ->getDefiningRecipe ()))
8915
+ IV = WideIV;
8916
+ else
8917
+ IV = Incoming->getDefiningRecipe ()->getOperand (1 );
8918
+ // Skip phi nodes already updated. This can be the case if 2 induction
8919
+ // phis chase each other.
8920
+ VPValue *EndValue = EndValues[IV];
8921
+ if (any_of (cast<VPRecipeBase>(Incoming->getDefiningRecipe ())->operands (),
8922
+ IsaPred<VPWidenIntOrFpInductionRecipe,
8923
+ VPWidenPointerInductionRecipe>)) {
8924
+ ExitIRI->setOperand (0 , EndValue);
8925
+ continue ;
8926
+ }
8927
+
8928
+ VPBuilder B (Plan.getMiddleBlock ()->getTerminator ());
8929
+ VPValue *Escape = nullptr ;
8930
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe ());
8931
+ VPValue *Step = WideIV->getStepValue ();
8932
+ Type *ScalarTy = TypeInfo.inferScalarType (WideIV);
8933
+ if (ScalarTy->isIntegerTy ())
8934
+ Escape = B.createNaryOp (Instruction::Sub, {EndValue, Step}, {},
8935
+ " ind.escape" );
8936
+ else if (ScalarTy->isPointerTy ())
8937
+ Escape = B.createPtrAdd (
8938
+ EndValue,
8939
+ B.createNaryOp (Instruction::Sub,
8940
+ {Plan.getOrAddLiveIn (ConstantInt::get (
8941
+ Step->getLiveInIRValue ()->getType (), 0 )),
8942
+ Step}),
8943
+ {}, " ind.escape" );
8944
+ else if (ScalarTy->isFloatingPointTy ()) {
8945
+ const auto &ID = WideIV->getInductionDescriptor ();
8946
+ Escape = B.createNaryOp (
8947
+ ID.getInductionBinOp ()->getOpcode () == Instruction::FAdd
8948
+ ? Instruction::FSub
8949
+ : Instruction::FAdd,
8950
+ {EndValue, Step}, {ID.getInductionBinOp ()->getFastMathFlags ()});
8951
+ } else {
8952
+ llvm_unreachable (" all possible induction types must be handled" );
8953
+ }
8954
+ ExitIRI->setOperand (0 , Escape);
8955
+ continue ;
8956
+ }
8957
+
9013
8958
LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
9014
8959
VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
9015
8960
{V, Plan.getOrAddLiveIn (ConstantInt::get (
@@ -9294,13 +9239,18 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9294
9239
VPlanTransforms::handleUncountableEarlyExit (
9295
9240
*Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9296
9241
}
9297
- addScalarResumePhis (*Plan, [&RecipeBuilder](PHINode *P) {
9298
- return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9299
- });
9300
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9301
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9242
+ DenseMap<VPValue *, VPValue *> EndValues;
9243
+ addScalarResumePhis (
9244
+ *Plan,
9245
+ [&RecipeBuilder](PHINode *P) {
9246
+ return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe (P));
9247
+ },
9248
+ OrigLoop, EndValues);
9249
+ SetVector<VPIRInstruction *> ExitUsersToFix =
9250
+ collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
9302
9251
addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9303
- if (!addUsersInExitBlocks (*Plan, ExitUsersToFix)) {
9252
+ if (!addUsersInExitBlocks (*Plan, ExitUsersToFix, Legal->getInductionVars (),
9253
+ EndValues)) {
9304
9254
reportVectorizationFailure (
9305
9255
" Some exit values in loop with uncountable exit not supported yet" ,
9306
9256
" Some exit values in loop with uncountable exit not supported yet" ,
@@ -9419,6 +9369,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9419
9369
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
9420
9370
DebugLoc ());
9421
9371
9372
+ DenseMap<VPValue *, VPValue *> EndValues;
9422
9373
addScalarResumePhis (
9423
9374
*Plan,
9424
9375
[&Plan](PHINode *P) {
@@ -9428,9 +9379,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9428
9379
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9429
9380
return HeaderR->getUnderlyingValue () == P ? HeaderR : nullptr ;
9430
9381
});
9431
- }
9432
-
9433
- );
9382
+ },
9383
+ OrigLoop, EndValues);
9434
9384
9435
9385
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9436
9386
return Plan;
0 commit comments