Skip to content

Commit 009b77f

Browse files
committed
[VPlan] Update final exit value via VPlan.
Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on llvm#110004, llvm#109975 and llvm#112145.
1 parent 09dcbf7 commit 009b77f

File tree

3 files changed

+67
-118
lines changed

3 files changed

+67
-118
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 52 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -779,10 +779,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
779779
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
780780
void printDebugTracesAtStart() override;
781781
void printDebugTracesAtEnd() override;
782-
783-
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
784-
Value *VectorTripCount, BasicBlock *MiddleBlock,
785-
VPlan &Plan, VPTransformState &State) override {};
786782
};
787783

788784
// A specialized derived class of inner loop vectorizer that performs
@@ -2697,87 +2693,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
26972693
return {LoopVectorPreHeader, nullptr};
26982694
}
26992695

2700-
// Fix up external users of the induction variable. At this point, we are
2701-
// in LCSSA form, with all external PHIs that use the IV having one input value,
2702-
// coming from the remainder loop. We need those PHIs to also have a correct
2703-
// value for the IV when arriving directly from the middle block.
2704-
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2705-
const InductionDescriptor &II,
2706-
Value *VectorTripCount,
2707-
BasicBlock *MiddleBlock, VPlan &Plan,
2708-
VPTransformState &State) {
2709-
// There are two kinds of external IV usages - those that use the value
2710-
// computed in the last iteration (the PHI) and those that use the penultimate
2711-
// value (the value that feeds into the phi from the loop latch).
2712-
// We allow both, but they, obviously, have different values.
2713-
2714-
assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");
2715-
2716-
DenseMap<Value *, Value *> MissingVals;
2717-
2718-
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
2719-
OrigLoop->getLoopPreheader()))
2720-
->getIncomingValueForBlock(MiddleBlock);
2721-
2722-
// An external user of the last iteration's value should see the value that
2723-
// the remainder loop uses to initialize its own IV.
2724-
Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
2725-
for (User *U : PostInc->users()) {
2726-
Instruction *UI = cast<Instruction>(U);
2727-
if (!OrigLoop->contains(UI)) {
2728-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2729-
MissingVals[UI] = EndValue;
2730-
}
2731-
}
2732-
2733-
// An external user of the penultimate value need to see EndValue - Step.
2734-
// The simplest way to get this is to recompute it from the constituent SCEVs,
2735-
// that is Start + (Step * (CRD - 1)).
2736-
for (User *U : OrigPhi->users()) {
2737-
auto *UI = cast<Instruction>(U);
2738-
if (!OrigLoop->contains(UI)) {
2739-
assert(isa<PHINode>(UI) && "Expected LCSSA form");
2740-
IRBuilder<> B(MiddleBlock->getTerminator());
2741-
2742-
// Fast-math-flags propagate from the original induction instruction.
2743-
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2744-
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2745-
2746-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
2747-
assert(StepVPV && "step must have been expanded during VPlan execution");
2748-
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2749-
: State.get(StepVPV, VPLane(0));
2750-
Value *Escape = nullptr;
2751-
if (EndValue->getType()->isIntegerTy())
2752-
Escape = B.CreateSub(EndValue, Step);
2753-
else if (EndValue->getType()->isPointerTy())
2754-
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2755-
else if (EndValue->getType()->isFloatingPointTy()) {
2756-
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2757-
Instruction::FAdd
2758-
? Instruction::FSub
2759-
: Instruction::FAdd,
2760-
EndValue, Step);
2761-
} else {
2762-
llvm_unreachable("all possible induction types must be handled");
2763-
}
2764-
Escape->setName("ind.escape");
2765-
MissingVals[UI] = Escape;
2766-
}
2767-
}
2768-
2769-
for (auto &I : MissingVals) {
2770-
PHINode *PHI = cast<PHINode>(I.first);
2771-
// One corner case we have to handle is two IVs "chasing" each-other,
2772-
// that is %IV2 = phi [...], [ %IV1, %latch ]
2773-
// In this case, if IV1 has an external use, we need to avoid adding both
2774-
// "last value of IV1" and "penultimate value of IV2". So, verify that we
2775-
// don't already have an incoming value for the middle block.
2776-
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
2777-
PHI->addIncoming(I.second, MiddleBlock);
2778-
}
2779-
}
2780-
27812696
namespace {
27822697

27832698
struct CSEDenseMapInfo {
@@ -2907,25 +2822,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29072822
for (PHINode &PN : Exit->phis())
29082823
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
29092824

2910-
if (Cost->requiresScalarEpilogue(VF.isVector())) {
2911-
// No edge from the middle block to the unique exit block has been inserted
2912-
// and there is nothing to fix from vector loop; phis should have incoming
2913-
// from scalar loop only.
2914-
} else {
2915-
// TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2916-
// the cost model.
2917-
2918-
// If we inserted an edge from the middle block to the unique exit block,
2919-
// update uses outside the loop (phis) to account for the newly inserted
2920-
// edge.
2921-
2922-
// Fix-up external users of the induction variables.
2923-
for (const auto &Entry : Legal->getInductionVars())
2924-
fixupIVUsers(Entry.first, Entry.second,
2925-
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, Plan,
2926-
State);
2927-
}
2928-
29292825
for (Instruction *PI : PredicatedInstructions)
29302826
sinkScalarOperands(&*PI);
29312827

@@ -8821,7 +8717,7 @@ addUsersInExitBlock(VPlan &Plan,
88218717
}
88228718
}
88238719

8824-
static void addResumeValuesForInductions(VPlan &Plan) {
8720+
static void addResumeValuesForInductions(VPlan &Plan, Loop *OrigLoop) {
88258721
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
88268722
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
88278723

@@ -8870,9 +8766,11 @@ static void addResumeValuesForInductions(VPlan &Plan) {
88708766
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
88718767

88728768
VPBasicBlock *ScalarPHVPBB = nullptr;
8769+
VPBasicBlock *ExitVPBB = nullptr;
88738770
if (MiddleVPBB->getNumSuccessors() == 2) {
88748771
// Order is strict: first is the exit block, second is the scalar
88758772
// preheader.
8773+
ExitVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[0]);
88768774
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
88778775
} else {
88788776
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
@@ -8886,6 +8784,53 @@ static void addResumeValuesForInductions(VPlan &Plan) {
88868784
auto *ScalarLoopHeader =
88878785
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
88888786
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8787+
8788+
if (ExitVPBB) {
8789+
8790+
Value *PostInc =
8791+
OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
8792+
for (auto &R : *ExitVPBB) {
8793+
auto *VPIRInst = cast<VPIRInstruction>(&R);
8794+
auto *IRI = &VPIRInst->getInstruction();
8795+
if (!isa<PHINode>(IRI))
8796+
break;
8797+
// Skip phi nodes already updated. This can be the case if 2 induction
8798+
// phis chase each other.
8799+
if (VPIRInst->getNumOperands() == 1)
8800+
continue;
8801+
if (any_of(IRI->operands(),
8802+
[PostInc](Value *Op) { return Op == PostInc; })) {
8803+
VPIRInst->addOperand(EndValue);
8804+
continue;
8805+
}
8806+
8807+
if (any_of(IRI->operands(),
8808+
[OrigPhi](Value *Op) { return Op == OrigPhi; })) {
8809+
VPBuilder B(MiddleVPBB->getTerminator());
8810+
VPValue *Escape = nullptr;
8811+
if (ScalarTy->isIntegerTy())
8812+
Escape = B.createNaryOp(Instruction::Sub, {EndValue, Step});
8813+
else if (ScalarTy->isPointerTy())
8814+
Escape = B.createPtrAdd(
8815+
EndValue,
8816+
B.createNaryOp(
8817+
Instruction::Xor,
8818+
{Step, Plan.getOrAddLiveIn(ConstantInt::get(
8819+
Step->getLiveInIRValue()->getType(), -1))}));
8820+
else if (ScalarTy->isFloatingPointTy()) {
8821+
Escape = B.createNaryOp(
8822+
ID->getInductionBinOp()->getOpcode() == Instruction::FAdd
8823+
? Instruction::FSub
8824+
: Instruction::FAdd,
8825+
{EndValue, Step},
8826+
{ID->getInductionBinOp()->getFastMathFlags()});
8827+
} else {
8828+
llvm_unreachable("all possible induction types must be handled");
8829+
}
8830+
VPIRInst->addOperand(Escape);
8831+
}
8832+
}
8833+
}
88898834
}
88908835
}
88918836

@@ -9199,7 +9144,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91999144
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
92009145
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
92019146
addUsersInExitBlock(*Plan, ExitUsersToFix);
9202-
addResumeValuesForInductions(*Plan);
9147+
addResumeValuesForInductions(*Plan, OrigLoop);
92039148

92049149
// ---------------------------------------------------------------------------
92059150
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9305,7 +9250,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
93059250
bool HasNUW = true;
93069251
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
93079252
DebugLoc());
9308-
addResumeValuesForInductions(*Plan);
9253+
addResumeValuesForInductions(*Plan, OrigLoop);
93099254
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
93109255
return Plan;
93119256
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -319,16 +319,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
319319
LastLane = 0;
320320
}
321321

322-
auto *LastInst = cast<Instruction>(get(Def, LastLane));
323-
// Set the insert point after the last scalarized instruction or after the
324-
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
325-
// will directly follow the scalar definitions.
322+
auto *LastDef = get(Def, LastLane);
326323
auto OldIP = Builder.saveIP();
327-
auto NewIP =
328-
isa<PHINode>(LastInst)
329-
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
330-
: std::next(BasicBlock::iterator(LastInst));
331-
Builder.SetInsertPoint(&*NewIP);
324+
if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
325+
// TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
326+
// vector trip count being modeled in VPlan.
327+
// Set the insert point after the last scalarized instruction or after the
328+
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
329+
// will directly follow the scalar definitions.
330+
auto NewIP =
331+
isa<PHINode>(LastInst)
332+
? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
333+
: std::next(BasicBlock::iterator(LastInst));
334+
Builder.SetInsertPoint(&*NewIP);
335+
}
332336

333337
// However, if we are vectorizing, we need to construct the vector values.
334338
// If the value is known to be uniform after vectorization, we can just
@@ -343,7 +347,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
343347
} else {
344348
// Initialize packing with insertelements to start from undef.
345349
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
346-
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
350+
Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
347351
set(Def, Undef);
348352
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
349353
packScalarIntoVectorValue(Def, Lane);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ void VPIRInstruction::execute(VPTransformState &State) {
830830
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
831831
// Set insertion point in PredBB in case an extract needs to be generated.
832832
// TODO: Model extracts explicitly.
833-
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
833+
State.Builder.SetInsertPoint(&*PredBB->getTerminator());
834834
Value *V = State.get(ExitValue, VPLane(Lane));
835835
auto *Phi = cast<PHINode>(&I);
836836
if (Phi->getBasicBlockIndex(PredBB) == -1)

0 commit comments

Comments
 (0)