Skip to content

Commit a794ee4

Browse files
authored
[VPlan] Add VPValue for VF, use it for VPWidenIntOrFpInductionRecipe. (llvm#95305)
Similar to VFxUF, also add a VF VPValue to VPlan and use it to get the runtime VF in VPWidenIntOrFpInductionRecipe. Code for VF is only generated if there are users of VF, to avoid unnecessary test changes. PR: llvm#95305
1 parent a99d666 commit a794ee4

34 files changed

+162
-215
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -8184,10 +8184,12 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
81848184
VPValue *Step =
81858185
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
81868186
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
8187-
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI);
8187+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
8188+
IndDesc, TruncI);
81888189
}
81898190
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
8190-
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
8191+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
8192+
IndDesc);
81918193
}
81928194

81938195
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(

llvm/lib/Transforms/Vectorize/VPlan.cpp

+22-4
Original file line numberDiff line numberDiff line change
@@ -921,11 +921,11 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
921921
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
922922
Value *CanonicalIVStartValue,
923923
VPTransformState &State) {
924+
Type *TCTy = TripCountV->getType();
924925
// Check if the backedge taken count is needed, and if so build it.
925926
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
926927
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
927-
auto *TCMO = Builder.CreateSub(TripCountV,
928-
ConstantInt::get(TripCountV->getType(), 1),
928+
auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
929929
"trip.count.minus.1");
930930
BackedgeTakenCount->setUnderlyingValue(TCMO);
931931
}
@@ -935,8 +935,17 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
935935
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
936936
// FIXME: Model VF * UF computation completely in VPlan.
937937
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
938-
VFxUF.setUnderlyingValue(
939-
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));
938+
if (VF.getNumUsers()) {
939+
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
940+
VF.setUnderlyingValue(RuntimeVF);
941+
VFxUF.setUnderlyingValue(
942+
State.UF > 1
943+
? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, State.UF))
944+
: RuntimeVF);
945+
} else {
946+
VFxUF.setUnderlyingValue(
947+
createStepForVF(Builder, TCTy, State.VF, State.UF));
948+
}
940949

941950
// When vectorizing the epilogue loop, the canonical induction start value
942951
// needs to be changed from zero to the value after the main vector loop.
@@ -1099,6 +1108,12 @@ InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
10991108
void VPlan::printLiveIns(raw_ostream &O) const {
11001109
VPSlotTracker SlotTracker(this);
11011110

1111+
if (VF.getNumUsers() > 0) {
1112+
O << "\nLive-in ";
1113+
VF.printAsOperand(O, SlotTracker);
1114+
O << " = VF";
1115+
}
1116+
11021117
if (VFxUF.getNumUsers() > 0) {
11031118
O << "\nLive-in ";
11041119
VFxUF.printAsOperand(O, SlotTracker);
@@ -1241,6 +1256,7 @@ VPlan *VPlan::duplicate() {
12411256
NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
12421257
}
12431258
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1259+
Old2NewVPValues[&VF] = &NewPlan->VF;
12441260
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
12451261
if (BackedgeTakenCount) {
12461262
NewPlan->BackedgeTakenCount = new VPValue();
@@ -1551,6 +1567,8 @@ void VPSlotTracker::assignName(const VPValue *V) {
15511567
}
15521568

15531569
void VPSlotTracker::assignNames(const VPlan &Plan) {
1570+
if (Plan.VF.getNumUsers() > 0)
1571+
assignName(&Plan.VF);
15541572
if (Plan.VFxUF.getNumUsers() > 0)
15551573
assignName(&Plan.VFxUF);
15561574
assignName(&Plan.VectorTripCount);

llvm/lib/Transforms/Vectorize/VPlan.h

+15-4
Original file line numberDiff line numberDiff line change
@@ -1855,25 +1855,27 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
18551855

18561856
public:
18571857
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1858-
const InductionDescriptor &IndDesc)
1858+
VPValue *VF, const InductionDescriptor &IndDesc)
18591859
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
18601860
Trunc(nullptr), IndDesc(IndDesc) {
18611861
addOperand(Step);
1862+
addOperand(VF);
18621863
}
18631864

18641865
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1865-
const InductionDescriptor &IndDesc,
1866+
VPValue *VF, const InductionDescriptor &IndDesc,
18661867
TruncInst *Trunc)
18671868
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
18681869
IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
18691870
addOperand(Step);
1871+
addOperand(VF);
18701872
}
18711873

18721874
~VPWidenIntOrFpInductionRecipe() override = default;
18731875

18741876
VPWidenIntOrFpInductionRecipe *clone() override {
1875-
return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
1876-
getStepValue(), IndDesc, Trunc);
1877+
return new VPWidenIntOrFpInductionRecipe(
1878+
IV, getStartValue(), getStepValue(), getVFValue(), IndDesc, Trunc);
18771879
}
18781880

18791881
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
@@ -1906,6 +1908,9 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
19061908
VPValue *getStepValue() { return getOperand(1); }
19071909
const VPValue *getStepValue() const { return getOperand(1); }
19081910

1911+
VPValue *getVFValue() { return getOperand(2); }
1912+
const VPValue *getVFValue() const { return getOperand(2); }
1913+
19091914
/// Returns the first defined value as TruncInst, if it is one or nullptr
19101915
/// otherwise.
19111916
TruncInst *getTruncInst() { return Trunc; }
@@ -3376,6 +3381,9 @@ class VPlan {
33763381
/// Represents the vector trip count.
33773382
VPValue VectorTripCount;
33783383

3384+
/// Represents the vectorization factor of the loop.
3385+
VPValue VF;
3386+
33793387
/// Represents the loop-invariant VF * UF of the vector loop region.
33803388
VPValue VFxUF;
33813389

@@ -3471,6 +3479,9 @@ class VPlan {
34713479
/// The vector trip count.
34723480
VPValue &getVectorTripCount() { return VectorTripCount; }
34733481

3482+
/// Returns the VF of the vector loop region.
3483+
VPValue &getVF() { return VF; };
3484+
34743485
/// Returns VF * UF of the vector loop region.
34753486
VPValue &getVFxUF() { return VFxUF; }
34763487

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+6-11
Original file line numberDiff line numberDiff line change
@@ -1416,14 +1416,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
14161416
: ConstantFP::get(Ty, C);
14171417
}
14181418

1419-
static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
1420-
ElementCount VF) {
1421-
assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1422-
Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1423-
Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1424-
return B.CreateUIToFP(RuntimeVF, FTy);
1425-
}
1426-
14271419
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
14281420
assert(!State.Instance && "Int or FP induction being replicated.");
14291421

@@ -1481,11 +1473,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
14811473
// Multiply the vectorization factor by the step using integer or
14821474
// floating-point arithmetic as appropriate.
14831475
Type *StepType = Step->getType();
1484-
Value *RuntimeVF;
1476+
Value *RuntimeVF = State.get(getVFValue(), {0, 0});
14851477
if (Step->getType()->isFloatingPointTy())
1486-
RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1478+
RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
14871479
else
1488-
RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1480+
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
14891481
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
14901482

14911483
// Create a vector splat to use in the induction update.
@@ -1539,6 +1531,9 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
15391531

15401532
O << ", ";
15411533
getStepValue()->printAsOperand(O, SlotTracker);
1534+
1535+
O << ", ";
1536+
getVFValue()->printAsOperand(O, SlotTracker);
15421537
}
15431538
#endif
15441539

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
6161
VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
6262
VPValue *Step =
6363
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
64-
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
64+
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step,
65+
&Plan->getVF(), *II);
6566
} else {
6667
assert(isa<VPInstruction>(&Ingredient) &&
6768
"only VPInstructions expected here");

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
2222
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
24-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
25-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
26-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP11]]
24+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2725
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
2826
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2927
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0
@@ -112,9 +110,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
112110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
113111
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
114112
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
115-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
116-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
117-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP11]]
113+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
118114
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
119115
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
120116
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

+2-6
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
142142
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
143143
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
144144
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
145-
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
146-
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
147-
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP19]]
145+
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
148146
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
149147
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
150148
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
@@ -271,9 +269,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
271269
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
272270
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
273271
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
274-
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
275-
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
276-
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP19]]
272+
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
277273
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
278274
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
279275
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ define void @foo() {
2424
; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
2525
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
2626
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
27-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
28-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
29-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
27+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]]
3028
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
3129
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
3230
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
3232
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i32> [[TMP8]], zeroinitializer
3333
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
3434
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
35-
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
36-
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 2
37-
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
3836
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
3937
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
4038
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,9 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
293293
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
294294
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
295295
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
296+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP3]], 2
296297
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
297298
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
298-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
299-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
300299
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
301300
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
302301
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ define void @induction_i7(ptr %dst) #0 {
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
1919
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
2020
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
21-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
21+
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
22+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
2223
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
2324
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i7>
2425
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i7> [[TMP7]], zeroinitializer
2526
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i7> [[TMP8]], shufflevector (<vscale x 2 x i7> insertelement (<vscale x 2 x i7> poison, i7 1, i64 0), <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer)
2627
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i7> zeroinitializer, [[TMP9]]
27-
; CHECK-NEXT: [[TMP10:%.*]] = call i7 @llvm.vscale.i7()
28-
; CHECK-NEXT: [[TMP11:%.*]] = mul i7 [[TMP10]], 2
28+
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP40]] to i7
2929
; CHECK-NEXT: [[TMP12:%.*]] = mul i7 1, [[TMP11]]
3030
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i7> poison, i7 [[TMP12]], i64 0
3131
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i7> [[DOTSPLATINSERT]], <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer
@@ -92,14 +92,14 @@ define void @induction_i3_zext(ptr %dst) #0 {
9292
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
9393
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
9494
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
95-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
95+
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
96+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
9697
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
9798
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i3>
9899
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i3> [[TMP7]], zeroinitializer
99100
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i3> [[TMP8]], shufflevector (<vscale x 2 x i3> insertelement (<vscale x 2 x i3> poison, i3 1, i64 0), <vscale x 2 x i3> poison, <vscale x 2 x i32> zeroinitializer)
100101
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i3> zeroinitializer, [[TMP9]]
101-
; CHECK-NEXT: [[TMP10:%.*]] = call i3 @llvm.vscale.i3()
102-
; CHECK-NEXT: [[TMP11:%.*]] = mul i3 [[TMP10]], 2
102+
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP40]] to i3
103103
; CHECK-NEXT: [[TMP12:%.*]] = mul i3 1, [[TMP11]]
104104
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i3> poison, i3 [[TMP12]], i64 0
105105
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i3> [[DOTSPLATINSERT]], <vscale x 2 x i3> poison, <vscale x 2 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly
2626
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2727
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2
2828
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
29-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
30-
; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 2
31-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
29+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
3230
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
3331
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3432
; CHECK: vector.body:

0 commit comments

Comments
 (0)