Skip to content

Commit b27f82e

Browse files
committed
[VPlan] Add VPInstruction::StepVector and use it in VPWidenIntOrFpInductionRecipe
Split off from llvm#118638, this adds a new VPInstruction for integer step vectors (0,1,2,...), so that we can eventually model all the separate parts of VPWidenIntOrFpInductionRecipe in VPlan. The type of the element is specified through a sentinel value as is done in llvm#119284. This is then used by VPWidenIntOrFpInductionRecipe, where we add it just before execution in convertToConcreteRecipes. We need a dummy placeholder operand so we have somewhere to pass it, but this should go away when #llvm#118638 lands.
1 parent 71f4c7d commit b27f82e

10 files changed

+49
-21
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
881881
// Extracts the first active lane of a vector, where the first operand is
882882
// the predicate, and the second operand is the vector to extract.
883883
ExtractFirstActive,
884+
// Creates a step vector starting from 0 with a step of 1. The first operand
885+
// is a dummy constant that should be used to specify the element type.
886+
StepVector,
884887
};
885888

886889
private:
@@ -1769,6 +1772,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
17691772
Step, IndDesc, DL),
17701773
Trunc(nullptr) {
17711774
addOperand(VF);
1775+
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
17721776
}
17731777

17741778
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
@@ -1778,6 +1782,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
17781782
Step, IndDesc, DL),
17791783
Trunc(Trunc) {
17801784
addOperand(VF);
1785+
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
17811786
}
17821787

17831788
~VPWidenIntOrFpInductionRecipe() override = default;
@@ -1803,10 +1808,14 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18031808
VPValue *getVFValue() { return getOperand(2); }
18041809
const VPValue *getVFValue() const { return getOperand(2); }
18051810

1811+
VPValue *getStepVector() { return getOperand(3); }
1812+
const VPValue *getStepVector() const { return getOperand(3); }
1813+
void setStepVector(VPValue *V) { setOperand(3, V); }
1814+
18061815
VPValue *getSplatVFValue() {
18071816
// If the recipe has been unrolled (4 operands), return the VPValue for the
18081817
// induction increment.
1809-
return getNumOperands() == 5 ? getOperand(3) : nullptr;
1818+
return getNumOperands() == 6 ? getOperand(4) : nullptr;
18101819
}
18111820

18121821
/// Returns the first defined value as TruncInst, if it is one or nullptr
@@ -1828,7 +1837,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18281837
/// the last unrolled part, if it exists. Returns itself if unrolling did not
18291838
/// take place.
18301839
VPValue *getLastUnrolledPartOperand() {
1831-
return getNumOperands() == 5 ? getOperand(4) : this;
1840+
return getNumOperands() == 6 ? getOperand(5) : this;
18321841
}
18331842

18341843
/// Returns true if the recipe only uses the first lane of operand \p Op.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
7777
case VPInstruction::CalculateTripCountMinusVF:
7878
case VPInstruction::CanonicalIVIncrementForPart:
7979
case VPInstruction::AnyOf:
80+
case VPInstruction::StepVector:
8081
return SetResultTyFromOp();
8182
case VPInstruction::ExtractFirstActive:
8283
case VPInstruction::ExtractFromEnd: {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
713713
Builder.getInt64Ty(), Mask, true, "first.active.lane");
714714
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
715715
}
716+
case VPInstruction::StepVector: {
717+
Type *EltTy = State.get(getOperand(0), true)->getType();
718+
return State.Builder.CreateStepVector(VectorType::get(EltTy, State.VF));
719+
}
720+
716721
default:
717722
llvm_unreachable("Unsupported opcode for instruction");
718723
}
@@ -824,6 +829,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
824829
case VPInstruction::LogicalAnd:
825830
case VPInstruction::Not:
826831
case VPInstruction::PtrAdd:
832+
case VPInstruction::StepVector:
827833
return false;
828834
default:
829835
return true;
@@ -850,6 +856,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
850856
case VPInstruction::BranchOnCount:
851857
case VPInstruction::BranchOnCond:
852858
case VPInstruction::ResumePhi:
859+
case VPInstruction::StepVector:
853860
return true;
854861
case VPInstruction::PtrAdd:
855862
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -947,6 +954,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
947954
case VPInstruction::ExtractFirstActive:
948955
O << "extract-first-active";
949956
break;
957+
case VPInstruction::StepVector:
958+
O << "step-vector";
959+
break;
950960
default:
951961
O << Instruction::getOpcodeName(getOpcode());
952962
}
@@ -1710,7 +1720,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
17101720
/// (0 * Step, 1 * Step, 2 * Step, ...)
17111721
/// to each vector element of Val.
17121722
/// \p Opcode is relevant for FP induction variable.
1713-
static Value *getStepVector(Value *Val, Value *Step,
1723+
/// \p InitVec is an integer step vector from 0 with a step of 1.
1724+
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
17141725
Instruction::BinaryOps BinOp, ElementCount VF,
17151726
IRBuilderBase &Builder) {
17161727
assert(VF.isVector() && "only vector VFs are supported");
@@ -1726,15 +1737,6 @@ static Value *getStepVector(Value *Val, Value *Step,
17261737

17271738
SmallVector<Constant *, 8> Indices;
17281739

1729-
// Create a vector of consecutive numbers from zero to VF.
1730-
VectorType *InitVecValVTy = ValVTy;
1731-
if (STy->isFloatingPointTy()) {
1732-
Type *InitVecValSTy =
1733-
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1734-
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1735-
}
1736-
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1737-
17381740
if (STy->isIntegerTy()) {
17391741
Step = Builder.CreateVectorSplat(VLen, Step);
17401742
assert(Step->getType() == Val->getType() && "Invalid step vec");
@@ -1800,8 +1802,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
18001802
}
18011803

18021804
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1803-
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1804-
State.VF, State.Builder);
1805+
assert(cast<VPInstruction>(getStepVector())->getOpcode() ==
1806+
VPInstruction::StepVector);
1807+
Value *SteppedStart =
1808+
::getStepVector(SplatStart, Step, State.get(getStepVector()),
1809+
ID.getInductionOpcode(), State.VF, State.Builder);
18051810

18061811
// We create vector phi nodes for both integer and floating-point induction
18071812
// variables. Here, we determine the kind of arithmetic we will perform.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,6 +2033,19 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
20332033
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
20342034
vp_depth_first_deep(Plan.getEntry()))) {
20352035
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
2036+
if (auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
2037+
Type *Ty = IVR->getTruncInst() ? IVR->getTruncInst()->getType()
2038+
: IVR->getPHINode()->getType();
2039+
if (Ty->isFloatingPointTy())
2040+
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
2041+
VPValue *TyVal = Plan.getOrAddLiveIn(Constant::getNullValue(Ty));
2042+
2043+
VPInstruction *StepVector =
2044+
new VPInstruction(VPInstruction::StepVector, {TyVal});
2045+
Plan.getVectorPreheader()->appendRecipe(StepVector);
2046+
IVR->setStepVector(StepVector);
2047+
}
2048+
20362049
if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
20372050
continue;
20382051
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2626
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
2727
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
2828
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
29+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
2930
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
3031
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
31-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
3232
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
3333
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
3434
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1452,11 +1452,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
14521452
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
14531453
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
14541454
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1455+
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
14551456
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
14561457
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
14571458
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
14581459
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
1459-
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
14601460
; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
14611461
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
14621462
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ define void @dead_load(ptr %p, i16 %start) {
3333
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
3434
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3
3535
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]]
36+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
3637
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
3738
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
38-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
3939
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP15]], splat (i64 3)
4040
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
4141
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]

llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
7070
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
7171
; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3
7272
; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]]
73+
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
7374
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
7475
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
75-
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
7676
; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP53]], splat (i64 3)
7777
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]]
7878
; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
600600
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
601601
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
602602
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
603-
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
604603
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
604+
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
605605
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
606606
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
607607
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
@@ -792,8 +792,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
792792
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
793793
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
794794
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
795-
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
796795
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
796+
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
797797
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
798798
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
799799
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
663663
; CHECK-NEXT: <x1> vector loop: {
664664
; CHECK-NEXT: vector.body:
665665
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
666-
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8)
666+
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]>, vp<[[VF]]> (truncated to i8)
667667
; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]>
668668
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]>
669669
; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1>

0 commit comments

Comments
 (0)