-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Omit zero add in VPWidenIntOrFpInductionRecipe #119668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Omit zero add in VPWidenIntOrFpInductionRecipe #119668
Conversation
I'm not sure if getStepVector was used for other things in the past where StartIdx was non-zero, but nowadays VPWidenIntOrFpInductionRecipe is the only user of it, and just passes zero to it. I presume InstCombine was already catching this so hopefully removing this won't affect codegen.
@llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesI'm not sure if getStepVector was used for other things in the past where StartIdx was non-zero, but nowadays VPWidenIntOrFpInductionRecipe is the only user of it, and just passes zero to it. I presume InstCombine was already catching this so hopefully removing this won't affect codegen. Patch is 43.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119668.diff 17 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..54392bcac4417f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1576,10 +1576,10 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
}
/// This function adds
-/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
+/// (0 * Step, 1 * Step, 2 * Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
/// \p Opcode is relevant for FP induction variable.
-static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
+static Value *getStepVector(Value *Val, Value *Step,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
@@ -1604,11 +1604,7 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
- // Splat the StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
-
if (STy->isIntegerTy()) {
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw
@@ -1621,7 +1617,6 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
"Binary Opcode should be specified for FP induction");
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
- InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
Value *MulOp = Builder.CreateFMul(InitVec, Step);
@@ -1672,10 +1667,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
- Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(
- SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
+ Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
+ State.VF, State.Builder);
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 1948211858d446..ac7f1478cf68c3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -18,8 +18,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
@@ -105,8 +104,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 5e7b4ca9d300b9..2da89120182c01 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -125,8 +125,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1)
+; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
@@ -252,8 +251,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1)
+; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
index 0db4799a0a8fe8..3302103873bd33 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
@@ -21,36 +21,35 @@ define void @foo() {
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
; CHECK-NEXT: br label [[INNER_LOOP1:%.*]]
; CHECK: inner_loop1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
-; CHECK-NEXT: [[TMP12]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]]
-; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1)
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], splat (i64 512)
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
-; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
+; CHECK-NEXT: [[TMP11]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]]
+; CHECK-NEXT: [[TMP12]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1)
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i64> [[TMP12]], splat (i64 512)
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]]
; CHECK: vector.latch:
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index 41c92d61663325..a426cdf08062c8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -29,8 +29,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 2850aca9185548..e5b9812604f164 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -225,8 +225,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]])
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP14]], splat (i64 4)
+; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 4)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index 2e0eee1f862927..00fc704a92b057 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -26,8 +26,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -121,8 +120,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -216,8 +214,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -321,8 +318,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -428,8 +424,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -543,8 +538,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -861,8 +855,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesI'm not sure if getStepVector was used for other things in the past where StartIdx was non-zero, but nowadays VPWidenIntOrFpInductionRecipe is the only user of it, and just passes zero to it. I presume InstCombine was already catching this so hopefully removing this won't affect codegen. Patch is 43.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119668.diff 17 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e882368544e815..54392bcac4417f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1576,10 +1576,10 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
}
/// This function adds
-/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
+/// (0 * Step, 1 * Step, 2 * Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
/// \p Opcode is relevant for FP induction variable.
-static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
+static Value *getStepVector(Value *Val, Value *Step,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
@@ -1604,11 +1604,7 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
- // Splat the StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
-
if (STy->isIntegerTy()) {
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw
@@ -1621,7 +1617,6 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
"Binary Opcode should be specified for FP induction");
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
- InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
Value *MulOp = Builder.CreateFMul(InitVec, Step);
@@ -1672,10 +1667,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
- Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(
- SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
+ Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
+ State.VF, State.Builder);
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
index 1948211858d446..ac7f1478cf68c3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll
@@ -18,8 +18,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
@@ -105,8 +104,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 5e7b4ca9d300b9..2da89120182c01 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -125,8 +125,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1)
+; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
@@ -252,8 +251,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1)
+; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP20]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
index 0db4799a0a8fe8..3302103873bd33 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll
@@ -21,36 +21,35 @@ define void @foo() {
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
; CHECK-NEXT: br label [[INNER_LOOP1:%.*]]
; CHECK: inner_loop1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP13:%.*]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
-; CHECK-NEXT: [[TMP12]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]]
-; CHECK-NEXT: [[TMP13]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1)
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <vscale x 4 x i64> [[TMP13]], splat (i64 512)
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
-; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x float> [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, <vscale x 4 x i64> [[VEC_PHI]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> poison)
+; CHECK-NEXT: [[TMP11]] = fmul <vscale x 4 x float> [[VEC_PHI2]], [[WIDE_MASKED_GATHER3]]
+; CHECK-NEXT: [[TMP12]] = add nuw nsw <vscale x 4 x i64> [[VEC_PHI]], splat (i64 1)
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <vscale x 4 x i64> [[TMP12]], splat (i64 512)
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[TMP13]], i32 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]]
; CHECK: vector.latch:
-; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP12]], [[INNER_LOOP1]] ]
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x float> [ [[TMP11]], [[INNER_LOOP1]] ]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VEC_PHI4]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index 41c92d61663325..a426cdf08062c8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -29,8 +29,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 2850aca9185548..e5b9812604f164 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -225,8 +225,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]])
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP14]], splat (i64 4)
+; CHECK-NEXT: [[TMP15:%.*]] = mul <vscale x 4 x i64> [[TMP13]], splat (i64 4)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 4, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
index 2e0eee1f862927..00fc704a92b057 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -26,8 +26,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -121,8 +120,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -216,8 +214,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -321,8 +318,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -428,8 +424,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], splat (i64 3)
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP7]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
@@ -543,8 +538,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], splat (i64 3)
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP11]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
@@ -861,8 +855,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Look good if getStepVector
only have one user VPWidenIntOrFpInductionRecipe::execute
.
@@ -1576,10 +1576,10 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, | |||
} | |||
|
|||
/// This function adds | |||
/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) | |||
/// (0 * Step, 1 * Step, 2 * Step, ...) | |||
/// to each vector element of Val. The sequence starts at StartIndex. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// to each vector element of Val. The sequence starts at StartIndex. | |
/// to each vector element of Val. The sequence starts at zero. |
I'm not sure if getStepVector was used for other things in the past where StartIdx was non-zero, but nowadays VPWidenIntOrFpInductionRecipe is the only user of it, and just passes zero to it. I presume InstCombine was already catching this so hopefully removing this won't affect codegen.