Skip to content

Commit c9f4cce

Browse files
committed
[VPlan] Add support for VPWidenIntOrFpInductionRecipe in predicated DataWithEVL vectorization mode.
As an alternative approach to llvm#82021, this patch lowers VPWidenIntOrFpInductionRecipe into a widen phi recipe and step recipes, computed using EVL in the EVL transformation phase.
1 parent c6091cd commit c9f4cce

12 files changed

+754
-76
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
115115
/// Identifies if the vector form of the intrinsic has a scalar operand.
116116
bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
117117
unsigned ScalarOpdIdx) {
118+
if (VPIntrinsic::isVPIntrinsic(ID) &&
119+
(ScalarOpdIdx == VPIntrinsic::getVectorLengthParamPos(ID)))
120+
return true;
121+
118122
switch (ID) {
119123
case Intrinsic::abs:
120124
case Intrinsic::ctlz:
@@ -127,6 +131,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
127131
case Intrinsic::umul_fix:
128132
case Intrinsic::umul_fix_sat:
129133
return (ScalarOpdIdx == 2);
134+
case Intrinsic::experimental_vp_splat:
135+
return (ScalarOpdIdx == 0);
130136
default:
131137
return false;
132138
}
@@ -148,6 +154,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
148154
return OpdIdx == 0;
149155
case Intrinsic::powi:
150156
return OpdIdx == -1 || OpdIdx == 1;
157+
case Intrinsic::experimental_vp_splat:
158+
return OpdIdx == -1;
151159
default:
152160
return OpdIdx == -1;
153161
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,16 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
11911191
return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
11921192
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
11931193
CostKind);
1194+
case Intrinsic::experimental_vp_splat: {
1195+
auto LT = getTypeLegalizationCost(RetTy);
1196+
if (RetTy->getScalarSizeInBits() == 1) {
1197+
return LT.first *
1198+
(1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1199+
LT.second, CostKind));
1200+
}
1201+
return LT.first *
1202+
getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
1203+
}
11941204
}
11951205

11961206
if (ST->hasVInstructions() && RetTy->isVectorTy()) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,8 +2939,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
29392939

29402940
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29412941
// Fix widened non-induction PHIs by setting up the PHI operands.
2942-
if (EnableVPlanNativePath)
2943-
fixNonInductionPHIs(State);
2942+
fixNonInductionPHIs(State);
29442943

29452944
// Forget the original basic block.
29462945
PSE.getSE()->forgetLoop(OrigLoop);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -285,15 +285,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
285285
return Shuf;
286286
};
287287

288-
if (!hasScalarValue(Def, {0})) {
289-
assert(Def->isLiveIn() && "expected a live-in");
290-
Value *IRV = Def->getLiveInIRValue();
291-
Value *B = GetBroadcastInstrs(IRV);
288+
Value *ScalarValue = hasScalarValue(Def, {0}) ? get(Def, VPLane(0)) : nullptr;
289+
if (!ScalarValue || isa<Constant>(ScalarValue)) {
290+
assert((ScalarValue || Def->isLiveIn()) && "expected a live-in");
291+
Value *B = ScalarValue ? GetBroadcastInstrs(ScalarValue)
292+
: GetBroadcastInstrs(Def->getLiveInIRValue());
292293
set(Def, B);
293294
return B;
294295
}
295296

296-
Value *ScalarValue = get(Def, VPLane(0));
297297
// If we aren't vectorizing, we can just copy the scalar map values over
298298
// to the vector map.
299299
if (VF.isScalar()) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,8 @@ bool VPInstruction::isVectorToScalar() const {
648648
}
649649

650650
bool VPInstruction::isSingleScalar() const {
651-
return getOpcode() == VPInstruction::ResumePhi;
651+
return getOpcode() == VPInstruction::ResumePhi ||
652+
getOpcode() == VPInstruction::ExplicitVectorLength;
652653
}
653654

654655
#if !defined(NDEBUG)
@@ -1022,6 +1023,8 @@ bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
10221023
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
10231024
// Vector predication intrinsics only demand the the first lane the last
10241025
// operand (the EVL operand).
1026+
if (VectorIntrinsicID == Intrinsic::experimental_vp_splat)
1027+
return Op == getOperand(0);
10251028
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
10261029
Op == getOperand(getNumOperands() - 1);
10271030
}
@@ -2309,9 +2312,8 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
23092312
#endif
23102313

23112314
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2312-
assert(vputils::onlyFirstLaneUsed(this) &&
2313-
"Codegen only implemented for first lane.");
23142315
switch (Opcode) {
2316+
case Instruction::UIToFP:
23152317
case Instruction::SExt:
23162318
case Instruction::ZExt:
23172319
case Instruction::Trunc: {
@@ -3414,9 +3416,6 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
34143416
#endif
34153417

34163418
void VPWidenPHIRecipe::execute(VPTransformState &State) {
3417-
assert(EnableVPlanNativePath &&
3418-
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
3419-
34203419
Value *Op0 = State.get(getOperand(0));
34213420
Type *VecTy = Op0->getType();
34223421
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 131 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,6 +1523,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
15231523
}
15241524
}
15251525

1526+
/// This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
1527+
/// an induction variable at the preheader.
1528+
static VPSingleDefRecipe *createStepVector(VPValue *StartValue, VPValue *Step,
1529+
Type *InductionTy,
1530+
const InductionDescriptor &ID,
1531+
VPBasicBlock *VectorPHVPBB,
1532+
DebugLoc DL) {
1533+
Type *IntTy = InductionTy->isIntegerTy()
1534+
? InductionTy
1535+
: IntegerType::get(InductionTy->getContext(),
1536+
InductionTy->getScalarSizeInBits());
1537+
// Create a vector of consecutive numbers from zero to VF.
1538+
VPSingleDefRecipe *InitVec =
1539+
new VPWidenIntrinsicRecipe(Intrinsic::stepvector, {}, IntTy, DL);
1540+
VectorPHVPBB->appendRecipe(InitVec);
1541+
1542+
if (InductionTy->isIntegerTy()) {
1543+
auto *Mul = new VPInstruction(Instruction::Mul, {InitVec, Step}, DL);
1544+
VectorPHVPBB->appendRecipe(Mul);
1545+
auto *SteppedStart =
1546+
new VPInstruction(Instruction::Add, {StartValue, Mul}, {}, "induction");
1547+
VectorPHVPBB->appendRecipe(SteppedStart);
1548+
return SteppedStart;
1549+
} else {
1550+
FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
1551+
InitVec = new VPWidenCastRecipe(Instruction::UIToFP, InitVec, InductionTy);
1552+
VectorPHVPBB->appendRecipe(InitVec);
1553+
auto *Mul = new VPInstruction(Instruction::FMul, {InitVec, Step}, FMF, DL);
1554+
VectorPHVPBB->appendRecipe(Mul);
1555+
Instruction::BinaryOps BinOp = ID.getInductionOpcode();
1556+
auto *SteppedStart =
1557+
new VPInstruction(BinOp, {StartValue, Mul}, FMF, DL, "induction");
1558+
VectorPHVPBB->appendRecipe(SteppedStart);
1559+
return SteppedStart;
1560+
}
1561+
}
1562+
1563+
/// Lower widen iv recipes into recipes with EVL.
1564+
static void
1565+
transformWidenIVRecipestoEVLRecipes(VPWidenIntOrFpInductionRecipe *WidenIV,
1566+
VPlan &Plan, VPValue *EVL) {
1567+
DebugLoc DL = WidenIV->getDebugLoc();
1568+
const InductionDescriptor &ID = WidenIV->getInductionDescriptor();
1569+
auto *CanonicalIVIncrement =
1570+
cast<VPInstruction>(Plan.getCanonicalIV()->getBackedgeValue());
1571+
VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion()->getPreheaderVPBB();
1572+
VPBasicBlock *ExitingVPBB =
1573+
Plan.getVectorLoopRegion()->getExitingBasicBlock();
1574+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
1575+
VPValue *StartValue = WidenIV->getStartValue();
1576+
VPValue *Step = WidenIV->getStepValue();
1577+
if (TruncInst *I = WidenIV->getTruncInst()) {
1578+
Type *TruncTy = I->getType();
1579+
auto *R = new VPScalarCastRecipe(Instruction::Trunc, StartValue, TruncTy);
1580+
VectorPHVPBB->appendRecipe(R);
1581+
StartValue = R;
1582+
R = new VPScalarCastRecipe(Instruction::Trunc, Step, TruncTy);
1583+
VectorPHVPBB->appendRecipe(R);
1584+
Step = R;
1585+
}
1586+
Type *InductionTy = TypeInfo.inferScalarType(StartValue);
1587+
LLVMContext &Ctx = InductionTy->getContext();
1588+
VPValue *TrueMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1589+
1590+
// Construct the initial value of the vector IV in the vector loop preheader
1591+
VPSingleDefRecipe *SteppedStart =
1592+
createStepVector(StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
1593+
1594+
// Create the vector phi node for both int. and fp. induction variables
1595+
// and determine the kind of arithmetic we will perform
1596+
auto *VecInd = new VPWidenPHIRecipe(WidenIV->getPHINode());
1597+
VecInd->insertBefore(WidenIV);
1598+
WidenIV->replaceAllUsesWith(VecInd);
1599+
Intrinsic::ID VPArithOp;
1600+
Instruction::BinaryOps MulOp;
1601+
if (InductionTy->isIntegerTy()) {
1602+
VPArithOp = Intrinsic::vp_add;
1603+
MulOp = Instruction::Mul;
1604+
} else {
1605+
VPArithOp = ID.getInductionOpcode() == Instruction::FAdd
1606+
? Intrinsic::vp_fadd
1607+
: Intrinsic::vp_fsub;
1608+
MulOp = Instruction::FMul;
1609+
}
1610+
1611+
// Multiply the runtime VF by the step
1612+
VPSingleDefRecipe *ScalarMul;
1613+
if (InductionTy->isFloatingPointTy()) {
1614+
FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
1615+
auto *CastEVL =
1616+
new VPScalarCastRecipe(Instruction::UIToFP, EVL, InductionTy);
1617+
CastEVL->insertBefore(CanonicalIVIncrement);
1618+
ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, FMF, DL);
1619+
} else {
1620+
unsigned InductionSz = InductionTy->getScalarSizeInBits();
1621+
unsigned EVLSz = TypeInfo.inferScalarType(EVL)->getScalarSizeInBits();
1622+
VPValue *CastEVL = EVL;
1623+
if (InductionSz != EVLSz) {
1624+
auto *R = new VPScalarCastRecipe(EVLSz > InductionSz ? Instruction::Trunc
1625+
: Instruction::ZExt,
1626+
EVL, InductionTy);
1627+
R->insertBefore(CanonicalIVIncrement);
1628+
CastEVL = R;
1629+
}
1630+
ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, DL);
1631+
}
1632+
ScalarMul->insertBefore(CanonicalIVIncrement);
1633+
// Create a vector splat to use in the induction update.
1634+
auto *SplatVF =
1635+
new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splat,
1636+
{ScalarMul, TrueMask, EVL}, InductionTy, DL);
1637+
SplatVF->insertBefore(CanonicalIVIncrement);
1638+
// TODO: We may need to add the step a number of times if UF > 1
1639+
auto *LastInduction = new VPWidenIntrinsicRecipe(
1640+
VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
1641+
LastInduction->insertBefore(CanonicalIVIncrement);
1642+
VecInd->addIncoming(SteppedStart, VectorPHVPBB);
1643+
VecInd->addIncoming(LastInduction, ExitingVPBB);
1644+
}
1645+
15261646
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
15271647
/// replaces all uses except the canonical IV increment of
15281648
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1569,8 +1689,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
15691689
// of the VF directly. At the moment, widened inductions cannot be updated, so
15701690
// bail out if the plan contains any.
15711691
bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
1572-
return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
1573-
&Phi);
1692+
return isa<VPWidenPointerInductionRecipe>(&Phi);
15741693
});
15751694
if (ContainsWidenInductions)
15761695
return false;
@@ -1615,6 +1734,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
16151734

16161735
transformRecipestoEVLRecipes(Plan, *VPEVL);
16171736

1737+
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1738+
SmallVector<VPRecipeBase *> ToRemove;
1739+
for (VPRecipeBase &Phi : HeaderVPBB->phis())
1740+
if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
1741+
transformWidenIVRecipestoEVLRecipes(WidenIV, Plan, VPEVL);
1742+
ToRemove.push_back(WidenIV);
1743+
}
1744+
for (VPRecipeBase *R : ToRemove)
1745+
R->eraseFromParent();
1746+
16181747
// Replace all uses of VPCanonicalIVPHIRecipe by
16191748
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
16201749
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
156156
.Case<VPScalarCastRecipe>(
157157
[&](const VPScalarCastRecipe *S) { return true; })
158158
.Case<VPInstruction>([&](const VPInstruction *I) {
159-
if (I->getOpcode() != Instruction::Add) {
159+
if ((I->getOpcode() != Instruction::Add) &&
160+
(I->getOpcode() != Instruction::Mul)) {
160161
errs()
161162
<< "EVL is used as an operand in non-VPInstruction::Add\n";
162163
return false;
@@ -166,11 +167,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
166167
"users\n";
167168
return false;
168169
}
169-
if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
170-
errs() << "Result of VPInstruction::Add with EVL operand is "
171-
"not used by VPEVLBasedIVPHIRecipe\n";
172-
return false;
173-
}
174170
return true;
175171
})
176172
.Default([&](const VPUser *U) {

0 commit comments

Comments
 (0)