Skip to content

Commit b916b88

Browse files
fhahnvar-const
authored andcommitted
[VPlan] Add opcode to create step for wide inductions. (llvm#119284)
This patch adds a WideIVStep opcode that can be used to create a vector with the steps to increment a wide induction. The opcode has 2 operands * the vector step * the scale of the vector step The opcode is later converted into a sequence of recipes that convert the scale and step to the target type, if needed, and then multiply vector step by scale. This simplifies code that needs to materialize step vectors, e.g. replacing wide IVs as follow up to llvm#108378 with an increment of the wide IV step. PR: llvm#119284
1 parent 88a69d7 commit b916b88

File tree

7 files changed

+118
-45
lines changed

7 files changed

+118
-45
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,14 @@ class VPBuilder {
172172
new VPInstruction(Opcode, Operands, *FMFs, DL, Name));
173173
return createInstruction(Opcode, Operands, DL, Name);
174174
}
175+
VPInstruction *createNaryOp(unsigned Opcode,
176+
std::initializer_list<VPValue *> Operands,
177+
Type *ResultTy,
178+
std::optional<FastMathFlags> FMFs = {},
179+
DebugLoc DL = {}, const Twine &Name = "") {
180+
return tryInsertInstruction(new VPInstructionWithType(
181+
Opcode, Operands, ResultTy, FMFs.value_or(FastMathFlags()), DL, Name));
182+
}
175183

176184
VPInstruction *createOverflowingOp(unsigned Opcode,
177185
std::initializer_list<VPValue *> Operands,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7934,7 +7934,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79347934
BestVPlan, BestVF,
79357935
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
79367936
VPlanTransforms::removeDeadRecipes(BestVPlan);
7937-
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
7937+
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
7938+
*Legal->getWidestInductionType());
79387939

79397940
// Perform the actual loop transformation.
79407941
VPTransformState State(&TTI, BestVF, LI, DT, ILV.Builder, &ILV, &BestVPlan,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,13 @@ class VPInstruction : public VPRecipeWithIRFlags,
884884
AnyOf,
885885
// Calculates the first active lane index of the vector predicate operand.
886886
FirstActiveLane,
887+
888+
// The opcodes below are used for VPInstructionWithType.
889+
//
890+
/// Scale the first operand (vector step) by the second operand
891+
/// (scalar-step). Casts both operands to the result type if needed.
892+
WideIVStep,
893+
887894
};
888895

889896
private:
@@ -1041,11 +1048,19 @@ class VPInstructionWithType : public VPInstruction {
10411048
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
10421049
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
10431050
: VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}
1051+
VPInstructionWithType(unsigned Opcode,
1052+
std::initializer_list<VPValue *> Operands,
1053+
Type *ResultTy, FastMathFlags FMFs, DebugLoc DL = {},
1054+
const Twine &Name = "")
1055+
: VPInstruction(Opcode, Operands, FMFs, DL, Name), ResultTy(ResultTy) {}
10441056

10451057
static inline bool classof(const VPRecipeBase *R) {
10461058
// VPInstructionWithType are VPInstructions with specific opcodes requiring
10471059
// type information.
1048-
return R->isScalarCast();
1060+
if (R->isScalarCast())
1061+
return true;
1062+
auto *VPI = dyn_cast<VPInstruction>(R);
1063+
return VPI && VPI->getOpcode() == VPInstruction::WideIVStep;
10491064
}
10501065

10511066
static inline bool classof(const VPUser *R) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,8 @@ bool VPInstruction::isFPMathOp() const {
873873
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
874874
Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
875875
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
876-
Opcode == Instruction::FCmp || Opcode == Instruction::Select;
876+
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
877+
Opcode == VPInstruction::WideIVStep;
877878
}
878879
#endif
879880

@@ -928,6 +929,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
928929
case VPInstruction::LogicalAnd:
929930
case VPInstruction::Not:
930931
case VPInstruction::PtrAdd:
932+
case VPInstruction::WideIVStep:
931933
return false;
932934
default:
933935
return true;
@@ -1097,9 +1099,19 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
10971099
VPSlotTracker &SlotTracker) const {
10981100
O << Indent << "EMIT ";
10991101
printAsOperand(O, SlotTracker);
1100-
O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
1101-
printOperands(O, SlotTracker);
1102-
O << " to " << *ResultTy;
1102+
O << " = ";
1103+
1104+
switch (getOpcode()) {
1105+
case VPInstruction::WideIVStep:
1106+
O << "wide-iv-step ";
1107+
printOperands(O, SlotTracker);
1108+
break;
1109+
default:
1110+
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
1111+
O << Instruction::getOpcodeName(getOpcode()) << " ";
1112+
printOperands(O, SlotTracker);
1113+
O << " to " << *ResultTy;
1114+
}
11031115
}
11041116
#endif
11051117

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10191019
TypeInfo.inferScalarType(R.getOperand(1)) ==
10201020
TypeInfo.inferScalarType(R.getVPSingleValue()))
10211021
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
1022+
1023+
if (match(&R, m_VPInstruction<VPInstruction::WideIVStep>(m_VPValue(X),
1024+
m_SpecificInt(1)))) {
1025+
Type *WideStepTy = TypeInfo.inferScalarType(R.getVPSingleValue());
1026+
if (TypeInfo.inferScalarType(X) != WideStepTy)
1027+
X = VPBuilder(&R).createWidenCast(Instruction::Trunc, X, WideStepTy);
1028+
R.getVPSingleValue()->replaceAllUsesWith(X);
1029+
}
10221030
}
10231031

10241032
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
@@ -2367,23 +2375,71 @@ void VPlanTransforms::createInterleaveGroups(
23672375
}
23682376
}
23692377

2370-
void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
2378+
void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
2379+
Type &CanonicalIVTy) {
2380+
using namespace llvm::VPlanPatternMatch;
2381+
2382+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
2383+
SmallVector<VPRecipeBase *> ToRemove;
23712384
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
23722385
vp_depth_first_deep(Plan.getEntry()))) {
2373-
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
2374-
if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
2386+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
2387+
if (isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R)) {
2388+
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
2389+
StringRef Name =
2390+
isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
2391+
auto *ScalarR = new VPInstruction(
2392+
Instruction::PHI, {PhiR->getStartValue(), PhiR->getBackedgeValue()},
2393+
PhiR->getDebugLoc(), Name);
2394+
ScalarR->insertBefore(PhiR);
2395+
PhiR->replaceAllUsesWith(ScalarR);
2396+
ToRemove.push_back(PhiR);
23752397
continue;
2376-
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
2377-
StringRef Name =
2378-
isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
2379-
auto *ScalarR = new VPInstruction(
2380-
Instruction::PHI, {PhiR->getStartValue(), PhiR->getBackedgeValue()},
2381-
PhiR->getDebugLoc(), Name);
2382-
ScalarR->insertBefore(PhiR);
2383-
PhiR->replaceAllUsesWith(ScalarR);
2384-
PhiR->eraseFromParent();
2398+
}
2399+
2400+
VPValue *VectorStep;
2401+
VPValue *ScalarStep;
2402+
if (!match(&R, m_VPInstruction<VPInstruction::WideIVStep>(
2403+
m_VPValue(VectorStep), m_VPValue(ScalarStep))))
2404+
continue;
2405+
2406+
// Expand WideIVStep.
2407+
auto *VPI = cast<VPInstruction>(&R);
2408+
VPBuilder Builder(VPI);
2409+
Type *IVTy = TypeInfo.inferScalarType(VPI);
2410+
if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
2411+
Instruction::CastOps CastOp = IVTy->isFloatingPointTy()
2412+
? Instruction::UIToFP
2413+
: Instruction::Trunc;
2414+
VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
2415+
}
2416+
2417+
auto *ConstStep =
2418+
ScalarStep->isLiveIn()
2419+
? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
2420+
: nullptr;
2421+
assert(!ConstStep || ConstStep->getValue() != 1);
2422+
if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
2423+
ScalarStep =
2424+
Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
2425+
}
2426+
2427+
std::optional<FastMathFlags> FMFs;
2428+
if (IVTy->isFloatingPointTy())
2429+
FMFs = VPI->getFastMathFlags();
2430+
2431+
unsigned MulOpc =
2432+
IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
2433+
VPInstruction *Mul = Builder.createNaryOp(
2434+
MulOpc, {VectorStep, ScalarStep}, FMFs, R.getDebugLoc());
2435+
VectorStep = Mul;
2436+
VPI->replaceAllUsesWith(VectorStep);
2437+
ToRemove.push_back(VPI);
23852438
}
23862439
}
2440+
2441+
for (VPRecipeBase *R : ToRemove)
2442+
R->eraseFromParent();
23872443
}
23882444

23892445
void VPlanTransforms::handleUncountableEarlyExit(

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,9 @@ struct VPlanTransforms {
176176
BasicBlock *UncountableExitingBlock,
177177
VPRecipeBuilder &RecipeBuilder);
178178

179-
/// Lower abstract recipes to concrete ones, that can be codegen'd.
180-
static void convertToConcreteRecipes(VPlan &Plan);
179+
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
180+
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
181+
static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
181182

182183
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
183184
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -155,33 +155,13 @@ void UnrollState::unrollWidenInductionByUF(
155155
if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
156156
FMFs = ID.getInductionBinOp()->getFastMathFlags();
157157

158-
VPValue *VectorStep = &Plan.getVF();
159-
VPBuilder Builder(PH);
160-
if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
161-
Instruction::CastOps CastOp =
162-
IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
163-
VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
164-
ToSkip.insert(VectorStep->getDefiningRecipe());
165-
}
166-
167158
VPValue *ScalarStep = IV->getStepValue();
168-
auto *ConstStep = ScalarStep->isLiveIn()
169-
? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
170-
: nullptr;
171-
if (!ConstStep || ConstStep->getValue() != 1) {
172-
if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
173-
ScalarStep =
174-
Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
175-
ToSkip.insert(ScalarStep->getDefiningRecipe());
176-
}
159+
VPBuilder Builder(PH);
160+
VPInstruction *VectorStep = Builder.createNaryOp(
161+
VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, FMFs,
162+
IV->getDebugLoc());
177163

178-
unsigned MulOpc =
179-
IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
180-
VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep},
181-
FMFs, IV->getDebugLoc());
182-
VectorStep = Mul;
183-
ToSkip.insert(Mul);
184-
}
164+
ToSkip.insert(VectorStep);
185165

186166
// Now create recipes to compute the induction steps for part 1 .. UF. Part 0
187167
// remains the header phi. Parts > 0 are computed by adding Step to the

0 commit comments

Comments
 (0)