Skip to content

Commit 68bbf28

Browse files
committed
[VPlan] Add VPInstruction::StepVector and use it in VPWidenIntOrFpInductionRecipe
Split off from llvm#118638, this adds a new VPInstruction for integer step vectors (0,1,2,...), so that we can eventually model all the separate parts of VPWidenIntOrFpInductionRecipe in VPlan. This is then used by VPWidenIntOrFpInductionRecipe, where we add it just before execution in convertToConcreteRecipes. We need a dummy placeholder operand so we have somewhere to pass it, but this should go away when #llvm#118638 lands.
1 parent 940108b commit 68bbf28

9 files changed

+54
-23
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
898898
/// Scale the first operand (vector step) by the second operand
899899
/// (scalar-step). Casts both operands to the result type if needed.
900900
WideIVStep,
901+
// Creates a step vector starting from 0 with a step of 1.
902+
StepVector,
901903

902904
};
903905

@@ -1063,6 +1065,9 @@ class VPInstructionWithType : public VPInstruction {
10631065
: VPInstruction(Opcode, Operands, FMFs, DL, Name), ResultTy(ResultTy) {}
10641066

10651067
static inline bool classof(const VPRecipeBase *R) {
1068+
if (isa<VPInstruction>(R) &&
1069+
cast<VPInstruction>(R)->getOpcode() == VPInstruction::StepVector)
1070+
return true;
10661071
// VPInstructionWithType are VPInstructions with specific opcodes requiring
10671072
// type information.
10681073
if (R->isScalarCast())
@@ -1836,6 +1841,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18361841
Step, IndDesc, DL),
18371842
Trunc(nullptr) {
18381843
addOperand(VF);
1844+
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
18391845
}
18401846

18411847
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
@@ -1845,6 +1851,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18451851
Step, IndDesc, DL),
18461852
Trunc(Trunc) {
18471853
addOperand(VF);
1854+
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
18481855
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
18491856
(void)Metadata;
18501857
if (Trunc)
@@ -1875,10 +1882,14 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18751882
VPValue *getVFValue() { return getOperand(2); }
18761883
const VPValue *getVFValue() const { return getOperand(2); }
18771884

1885+
VPValue *getStepVector() { return getOperand(3); }
1886+
const VPValue *getStepVector() const { return getOperand(3); }
1887+
void setStepVector(VPValue *V) { setOperand(3, V); }
1888+
18781889
VPValue *getSplatVFValue() {
18791890
// If the recipe has been unrolled (4 operands), return the VPValue for the
18801891
// induction increment.
1881-
return getNumOperands() == 5 ? getOperand(3) : nullptr;
1892+
return getNumOperands() == 6 ? getOperand(4) : nullptr;
18821893
}
18831894

18841895
/// Returns the first defined value as TruncInst, if it is one or nullptr
@@ -1900,7 +1911,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19001911
/// the last unrolled part, if it exists. Returns itself if unrolling did not
19011912
/// take place.
19021913
VPValue *getLastUnrolledPartOperand() {
1903-
return getNumOperands() == 5 ? getOperand(4) : this;
1914+
return getNumOperands() == 6 ? getOperand(5) : this;
19041915
}
19051916
};
19061917

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
930930
case VPInstruction::Not:
931931
case VPInstruction::PtrAdd:
932932
case VPInstruction::WideIVStep:
933+
case VPInstruction::StepVector:
933934
return false;
934935
default:
935936
return true;
@@ -1078,8 +1079,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10781079

10791080
void VPInstructionWithType::execute(VPTransformState &State) {
10801081
State.setDebugLocFrom(getDebugLoc());
1081-
assert(vputils::onlyFirstLaneUsed(this) &&
1082-
"Codegen only implemented for first lane.");
10831082
switch (getOpcode()) {
10841083
case Instruction::ZExt:
10851084
case Instruction::Trunc: {
@@ -1089,6 +1088,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
10891088
State.set(this, Cast, VPLane(0));
10901089
break;
10911090
}
1091+
case VPInstruction::StepVector: {
1092+
Value *StepVector =
1093+
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
1094+
State.set(this, StepVector);
1095+
break;
1096+
}
10921097
default:
10931098
llvm_unreachable("opcode not implemented yet");
10941099
}
@@ -1106,6 +1111,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
11061111
O << "wide-iv-step ";
11071112
printOperands(O, SlotTracker);
11081113
break;
1114+
case VPInstruction::StepVector:
1115+
O << "step-vector " << *ResultTy;
1116+
break;
11091117
default:
11101118
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
11111119
O << Instruction::getOpcodeName(getOpcode()) << " ";
@@ -1875,7 +1883,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
18751883
/// (0 * Step, 1 * Step, 2 * Step, ...)
18761884
/// to each vector element of Val.
18771885
/// \p Opcode is relevant for FP induction variable.
1878-
static Value *getStepVector(Value *Val, Value *Step,
1886+
/// \p InitVec is an integer step vector from 0 with a step of 1.
1887+
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
18791888
Instruction::BinaryOps BinOp, ElementCount VF,
18801889
IRBuilderBase &Builder) {
18811890
assert(VF.isVector() && "only vector VFs are supported");
@@ -1891,15 +1900,6 @@ static Value *getStepVector(Value *Val, Value *Step,
18911900

18921901
SmallVector<Constant *, 8> Indices;
18931902

1894-
// Create a vector of consecutive numbers from zero to VF.
1895-
VectorType *InitVecValVTy = ValVTy;
1896-
if (STy->isFloatingPointTy()) {
1897-
Type *InitVecValSTy =
1898-
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
1899-
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1900-
}
1901-
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1902-
19031903
if (STy->isIntegerTy()) {
19041904
Step = Builder.CreateVectorSplat(VLen, Step);
19051905
assert(Step->getType() == Val->getType() && "Invalid step vec");
@@ -1965,8 +1965,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
19651965
}
19661966

19671967
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1968-
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1969-
State.VF, State.Builder);
1968+
assert(cast<VPInstruction>(getStepVector())->getOpcode() ==
1969+
VPInstruction::StepVector);
1970+
Value *SteppedStart =
1971+
::getStepVector(SplatStart, Step, State.get(getStepVector()),
1972+
ID.getInductionOpcode(), State.VF, State.Builder);
19701973

19711974
// We create vector phi nodes for both integer and floating-point induction
19721975
// variables. Here, we determine the kind of arithmetic we will perform.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2410,6 +2410,23 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24102410
continue;
24112411
}
24122412

2413+
if (auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
2414+
// Infer an up-to-date type since
2415+
// optimizeVectorInductionWidthForTCAndVFUF may have truncated the start
2416+
// and step values.
2417+
Type *Ty = TypeInfo.inferScalarType(IVR->getStartValue());
2418+
if (TruncInst *Trunc = IVR->getTruncInst())
2419+
Ty = Trunc->getType();
2420+
if (Ty->isFloatingPointTy())
2421+
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
2422+
VPInstruction *StepVector = new VPInstructionWithType(
2423+
VPInstruction::StepVector, {}, Ty, R.getDebugLoc());
2424+
2425+
Plan.getVectorPreheader()->appendRecipe(StepVector);
2426+
IVR->setStepVector(StepVector);
2427+
continue;
2428+
}
2429+
24132430
VPValue *VectorStep;
24142431
VPValue *ScalarStep;
24152432
if (!match(&R, m_VPInstruction<VPInstruction::WideIVStep>(

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
2626
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
2727
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
2828
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
29+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
2930
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
3031
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
31-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
3232
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
3333
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
3434
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1283,11 +1283,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
12831283
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
12841284
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
12851285
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1286+
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
12861287
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
12871288
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
12881289
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
12891290
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
1290-
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
12911291
; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
12921292
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
12931293
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ define void @dead_load(ptr %p, i16 %start) {
3333
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
3434
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3
3535
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]]
36+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
3637
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
3738
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
38-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
3939
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP15]], splat (i64 3)
4040
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
4141
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]

llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
7070
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
7171
; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3
7272
; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]]
73+
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
7374
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
7475
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
75-
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
7676
; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP53]], splat (i64 3)
7777
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]]
7878
; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
582582
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
583583
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
584584
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
585-
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
586585
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
586+
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
587587
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
588588
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
589589
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
@@ -772,8 +772,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
772772
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
773773
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
774774
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
775-
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
776775
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
776+
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
777777
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
778778
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
779779
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
457457
; CHECK-NEXT: <x1> vector loop: {
458458
; CHECK-NEXT: vector.body:
459459
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
460-
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8)
460+
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]>, vp<[[VF]]> (truncated to i8)
461461
; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]>
462462
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]>
463463
; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1>

0 commit comments

Comments
 (0)