Skip to content

Commit b95cce9

Browse files
committed
[VPlan] Update wide induction inc recipes to use same step as Wide IV.
Update wide induction increments to use the same step as the corresponding wide induction. This enables detecting induction increments directly in VPlan and removes redundant splats.
1 parent c19e0d6 commit b95cce9

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9311,6 +9311,20 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93119311
"VPBasicBlock");
93129312
RecipeBuilder.fixHeaderPhis();
93139313

9314+
// Update wide induction increments to use the same step as the corresponding
9315+
// wide induction. This enables detecting induction increments directly in
9316+
// VPlan and removes redundant splats.
9317+
for (const auto &[Phi, ID] : Legal->getInductionVars()) {
9318+
auto *IVInc = cast<Instruction>(
9319+
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
9320+
if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
9321+
continue;
9322+
VPWidenInductionRecipe *WideIV =
9323+
cast<VPWidenInductionRecipe>(RecipeBuilder.getRecipe(Phi));
9324+
VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc);
9325+
R->setOperand(1, WideIV->getStepValue());
9326+
}
9327+
93149328
if (auto *UncountableExitingBlock =
93159329
Legal->getUncountableEarlyExitingBlock()) {
93169330
VPlanTransforms::handleUncountableEarlyExit(

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,14 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
2121
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
2222
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]]
24-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
25-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
2624
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2725
; CHECK: vector.body:
2826
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2927
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3028
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP1]]
3129
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
32-
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]]
33-
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
30+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
31+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]]
3432
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP3]]
3533
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
3634
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4

0 commit comments

Comments
 (0)