Skip to content

Commit 58326f1

Browse files
committed
Revert "[VPlan] Update final IV exit value via VPlan. (#112147)"
This reverts commit c2d15ac. Causes build failures on PPC stage2 & fuchsia bots https://lab.llvm.org/buildbot/#/builders/168/builds/7650 https://lab.llvm.org/buildbot/#/builders/11/builds/11248
1 parent c2d15ac commit 58326f1

10 files changed

+216
-288
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 193 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,11 @@ class InnerLoopVectorizer {
543543
protected:
544544
friend class LoopVectorizationPlanner;
545545

546+
/// Set up the values of the IVs correctly when exiting the vector loop.
547+
virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
548+
Value *VectorTripCount, BasicBlock *MiddleBlock,
549+
VPTransformState &State);
550+
546551
/// Iteratively sink the scalarized operands of a predicated instruction into
547552
/// the block that was created for it.
548553
void sinkScalarOperands(Instruction *PredInst);
@@ -780,6 +785,10 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
780785
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
781786
void printDebugTracesAtStart() override;
782787
void printDebugTracesAtEnd() override;
788+
789+
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
790+
Value *VectorTripCount, BasicBlock *MiddleBlock,
791+
VPTransformState &State) override {};
783792
};
784793

785794
// A specialized derived class of inner loop vectorizer that performs
@@ -2773,6 +2782,97 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27732782
return LoopVectorPreHeader;
27742783
}
27752784

2785+
// Fix up external users of the induction variable. At this point, we are
2786+
// in LCSSA form, with all external PHIs that use the IV having one input value,
2787+
// coming from the remainder loop. We need those PHIs to also have a correct
2788+
// value for the IV when arriving directly from the middle block.
2789+
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2790+
const InductionDescriptor &II,
2791+
Value *VectorTripCount,
2792+
BasicBlock *MiddleBlock,
2793+
VPTransformState &State) {
2794+
// There are two kinds of external IV usages - those that use the value
2795+
// computed in the last iteration (the PHI) and those that use the penultimate
2796+
// value (the value that feeds into the phi from the loop latch).
2797+
// We allow both, but they, obviously, have different values.
2798+
2799+
DenseMap<Value *, Value *> MissingVals;
2800+
2801+
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2802+
OrigLoop->getLoopPreheader()))
2803+
->getIncomingValueForBlock(MiddleBlock);
2804+
2805+
// An external user of the last iteration's value should see the value that
2806+
// the remainder loop uses to initialize its own IV.
2807+
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2808+
for (User *U : PostInc->users()) {
2809+
Instruction *UI = cast<Instruction>(U);
2810+
if (!OrigLoop->contains(UI)) {
2811+
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2812+
MissingVals[UI] = EndValue;
2813+
}
2814+
}
2815+
2816+
// An external user of the penultimate value need to see EndValue - Step.
2817+
// The simplest way to get this is to recompute it from the constituent SCEVs,
2818+
// that is Start + (Step * (CRD - 1)).
2819+
for (User *U : OrigPhi->users()) {
2820+
auto *UI = cast<Instruction>(U);
2821+
if (!OrigLoop->contains(UI)) {
2822+
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2823+
IRBuilder<> B(MiddleBlock->getTerminator());
2824+
2825+
// Fast-math-flags propagate from the original induction instruction.
2826+
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2827+
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2828+
2829+
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2830+
assert(StepVPV && "step must have been expanded during VPlan execution");
2831+
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2832+
: State.get(StepVPV, VPLane(0));
2833+
Value *Escape = nullptr;
2834+
if (EndValue->getType()->isIntegerTy())
2835+
Escape = B.CreateSub(EndValue, Step);
2836+
else if (EndValue->getType()->isPointerTy())
2837+
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2838+
else {
2839+
assert(EndValue->getType()->isFloatingPointTy() &&
2840+
"Unexpected induction type");
2841+
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2842+
Instruction::FAdd
2843+
? Instruction::FSub
2844+
: Instruction::FAdd,
2845+
EndValue, Step);
2846+
}
2847+
Escape->setName("ind.escape");
2848+
MissingVals[UI] = Escape;
2849+
}
2850+
}
2851+
2852+
assert((MissingVals.empty() ||
2853+
all_of(MissingVals,
2854+
[MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2855+
return all_of(
2856+
predecessors(cast<Instruction>(P.first)->getParent()),
2857+
[MiddleBlock, this](BasicBlock *Pred) {
2858+
return Pred == MiddleBlock ||
2859+
Pred == OrigLoop->getLoopLatch();
2860+
});
2861+
})) &&
2862+
"Expected escaping values from latch/middle.block only");
2863+
2864+
for (auto &I : MissingVals) {
2865+
PHINode *PHI = cast<PHINode>(I.first);
2866+
// One corner case we have to handle is two IVs "chasing" each-other,
2867+
// that is %IV2 = phi [...], [ %IV1, %latch ]
2868+
// In this case, if IV1 has an external use, we need to avoid adding both
2869+
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2870+
// don't already have an incoming value for the middle block.
2871+
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2872+
PHI->addIncoming(I.second, MiddleBlock);
2873+
}
2874+
}
2875+
27762876
namespace {
27772877

27782878
struct CSEDenseMapInfo {
@@ -2899,6 +2999,24 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28992999
for (PHINode &PN : Exit->phis())
29003000
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
29013001

3002+
if (Cost->requiresScalarEpilogue(VF.isVector())) {
3003+
// No edge from the middle block to the unique exit block has been inserted
3004+
// and there is nothing to fix from vector loop; phis should have incoming
3005+
// from scalar loop only.
3006+
} else {
3007+
// TODO: Check in VPlan to see if IV users need fixing instead of checking
3008+
// the cost model.
3009+
3010+
// If we inserted an edge from the middle block to the unique exit block,
3011+
// update uses outside the loop (phis) to account for the newly inserted
3012+
// edge.
3013+
3014+
// Fix-up external users of the induction variables.
3015+
for (const auto &Entry : Legal->getInductionVars())
3016+
fixupIVUsers(Entry.first, Entry.second,
3017+
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
3018+
}
3019+
29023020
// Don't apply optimizations below when no vector region remains, as they all
29033021
// require a vector loop at the moment.
29043022
if (!State.Plan->getVectorLoopRegion())
@@ -8931,9 +9049,11 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
89319049
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
89329050
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
89339051
/// the end value of the induction.
8934-
static VPInstruction *addResumePhiRecipeForInduction(
8935-
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8936-
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
9052+
static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
9053+
VPBuilder &VectorPHBuilder,
9054+
VPBuilder &ScalarPHBuilder,
9055+
VPTypeAnalysis &TypeInfo,
9056+
VPValue *VectorTC) {
89379057
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
89389058
// Truncated wide inductions resume from the last lane of their vector value
89399059
// in the last vector iteration which is handled elsewhere.
@@ -8967,10 +9087,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
89679087

89689088
/// Create resume phis in the scalar preheader for first-order recurrences,
89699089
/// reductions and inductions, and update the VPIRInstructions wrapping the
8970-
/// original phis in the scalar header. End values for inductions are added to
8971-
/// \p IVEndValues.
8972-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8973-
DenseMap<VPValue *, VPValue *> &IVEndValues) {
9090+
/// original phis in the scalar header.
9091+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
89749092
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
89759093
auto *ScalarPH = Plan.getScalarPreheader();
89769094
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8987,16 +9105,11 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
89879105
if (!ScalarPhiI)
89889106
break;
89899107

8990-
// TODO: Extract final value from induction recipe initially, optimize to
8991-
// pre-computed end value together in optimizeInductionExitUsers.
89929108
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
89939109
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8994-
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
9110+
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
89959111
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
89969112
&Plan.getVectorTripCount())) {
8997-
assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
8998-
"Expected a ResumePhi");
8999-
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
90009113
ScalarPhiIRI->addOperand(ResumePhi);
90019114
continue;
90029115
}
@@ -9027,6 +9140,65 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
90279140
}
90289141
}
90299142

9143+
/// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
9144+
/// either an untruncated wide induction, or if it increments a wide induction
9145+
/// by its step.
9146+
static bool isOptimizableIVOrUse(VPValue *VPV) {
9147+
VPRecipeBase *Def = VPV->getDefiningRecipe();
9148+
if (!Def)
9149+
return false;
9150+
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
9151+
if (WideIV) {
9152+
// VPV itself is a wide induction, separately compute the end value for exit
9153+
// users if it is not a truncated IV.
9154+
return isa<VPWidenPointerInductionRecipe>(WideIV) ||
9155+
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
9156+
}
9157+
9158+
// Check if VPV is an optimizable induction increment.
9159+
if (Def->getNumOperands() != 2)
9160+
return false;
9161+
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
9162+
if (!WideIV)
9163+
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
9164+
if (!WideIV)
9165+
return false;
9166+
9167+
using namespace VPlanPatternMatch;
9168+
auto &ID = WideIV->getInductionDescriptor();
9169+
9170+
// Check if VPV increments the induction by the induction step.
9171+
VPValue *IVStep = WideIV->getStepValue();
9172+
switch (ID.getInductionOpcode()) {
9173+
case Instruction::Add:
9174+
return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
9175+
m_Specific(IVStep)));
9176+
case Instruction::FAdd:
9177+
return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
9178+
m_Specific(IVStep)));
9179+
case Instruction::FSub:
9180+
return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
9181+
m_Specific(IVStep)));
9182+
case Instruction::Sub: {
9183+
// IVStep will be the negated step of the subtraction. Check if Step == -1 *
9184+
// IVStep.
9185+
VPValue *Step;
9186+
if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
9187+
!Step->isLiveIn() || !IVStep->isLiveIn())
9188+
return false;
9189+
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
9190+
auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
9191+
return StepCI && IVStepCI &&
9192+
StepCI->getValue() == (-1 * IVStepCI->getValue());
9193+
}
9194+
default:
9195+
return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
9196+
match(VPV, m_GetElementPtr(m_Specific(WideIV),
9197+
m_Specific(WideIV->getStepValue())));
9198+
}
9199+
llvm_unreachable("should have been covered by switch above");
9200+
}
9201+
90309202
// Collect VPIRInstructions for phis in the exit blocks that are modeled
90319203
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
90329204
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -9056,6 +9228,12 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90569228
}
90579229
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
90589230
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9231+
// Exit values for inductions are computed and updated outside of VPlan
9232+
// and independent of induction recipes.
9233+
// TODO: Compute induction exit values in VPlan.
9234+
if (isOptimizableIVOrUse(V) &&
9235+
ExitVPBB->getSinglePredecessor() == MiddleVPBB)
9236+
continue;
90599237
ExitUsersToFix.insert(ExitIRI);
90609238
ExitIRI->addOperand(V);
90619239
}
@@ -9075,7 +9253,6 @@ addUsersInExitBlocks(VPlan &Plan,
90759253

90769254
auto *MiddleVPBB = Plan.getMiddleBlock();
90779255
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9078-
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
90799256

90809257
// Introduce extract for exiting values and update the VPIRInstructions
90819258
// modeling the corresponding LCSSA phis.
@@ -9397,8 +9574,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93979574
VPlanTransforms::handleUncountableEarlyExit(
93989575
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93999576
}
9400-
DenseMap<VPValue *, VPValue *> IVEndValues;
9401-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
9577+
addScalarResumePhis(RecipeBuilder, *Plan);
94029578
SetVector<VPIRInstruction *> ExitUsersToFix =
94039579
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
94049580
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9481,7 +9657,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94819657
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
94829658
WithoutRuntimeCheck);
94839659
}
9484-
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
94859660

94869661
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
94879662
return Plan;
@@ -9533,10 +9708,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
95339708
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
95349709
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
95359710
}
9536-
DenseMap<VPValue *, VPValue *> IVEndValues;
9537-
// TODO: IVEndValues are not used yet in the native path, to optimize exit
9538-
// values.
9539-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
9711+
addScalarResumePhis(RecipeBuilder, *Plan);
95409712

95419713
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
95429714
return Plan;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,12 +1422,6 @@ class VPIRInstruction : public VPRecipeBase {
14221422
"Op must be an operand of the recipe");
14231423
return true;
14241424
}
1425-
1426-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1427-
assert(is_contained(operands(), Op) &&
1428-
"Op must be an operand of the recipe");
1429-
return true;
1430-
}
14311425
};
14321426

14331427
/// VPWidenRecipe is a recipe for producing a widened instruction using the

0 commit comments

Comments
 (0)