Skip to content

Commit bb937e2

Browse files
authored
[LV] Compute value of escaped induction based on the computed end value. (#110576)
Update fixupIVUsers to compute the value for escaped inductions using the already computed end value of the induction (EndValue), but subtracting the step. This results in slightly simpler codegen, as we avoid computing the full transformed index at VectorTripCount - 1. PR: #110576
1 parent 747d8f3 commit bb937e2

File tree

6 files changed

+42
-42
lines changed

6 files changed

+42
-42
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2747,17 +2747,24 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
27472747
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
27482748
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
27492749

2750-
Value *CountMinusOne = B.CreateSub(
2751-
VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1));
2752-
CountMinusOne->setName("cmo");
2753-
27542750
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
27552751
assert(StepVPV && "step must have been expanded during VPlan execution");
27562752
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
27572753
: State.get(StepVPV, VPLane(0));
2758-
Value *Escape =
2759-
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
2760-
II.getKind(), II.getInductionBinOp());
2754+
Value *Escape = nullptr;
2755+
if (EndValue->getType()->isIntegerTy())
2756+
Escape = B.CreateSub(EndValue, Step);
2757+
else if (EndValue->getType()->isPointerTy())
2758+
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
2759+
else if (EndValue->getType()->isFloatingPointTy()) {
2760+
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
2761+
Instruction::FAdd
2762+
? Instruction::FSub
2763+
: Instruction::FAdd,
2764+
EndValue, Step);
2765+
} else {
2766+
llvm_unreachable("all possible induction types must be handled");
2767+
}
27612768
Escape->setName("ind.escape");
27622769
MissingVals[UI] = Escape;
27632770
}

llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
4242
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4343
; CHECK: middle.block:
4444
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
45-
; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
46-
; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8
47-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]]
45+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[IND_END]], i64 -8
4846
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4947
; CHECK: scalar.ph:
5048
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]

llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,25 +208,23 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
208208
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
209209
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
210210
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
211-
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
212-
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
211+
; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
212+
; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
213213
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
214214
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
215215
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
216216
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
217217
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
218218
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
219-
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8
220-
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], ptr [[TMP4]], align 8
219+
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8
220+
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8
221221
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
222222
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
223223
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
224224
; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
225225
; AUTO_VEC: middle.block:
226226
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
227-
; AUTO_VEC-NEXT: [[CMO:%.*]] = add nsw i64 [[N_VEC]], -1
228-
; AUTO_VEC-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[CMO]] to double
229-
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast double [[DOTCAST6]], 3.000000e+00
227+
; AUTO_VEC-NEXT: [[IND_ESCAPE:%.*]] = fadd fast double [[TMP0]], -3.000000e+00
230228
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]]
231229
; AUTO_VEC: for.body:
232230
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
@@ -238,7 +236,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
238236
; AUTO_VEC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]]
239237
; AUTO_VEC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
240238
; AUTO_VEC: for.end:
241-
; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
239+
; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
242240
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
243241
;
244242
entry:

llvm/test/Transforms/LoopVectorize/iv_outside_user.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ for.end:
6363

6464
; CHECK-LABEL: @geppre
6565
; CHECK-LABEL: middle.block:
66-
; CHECK: %ind.escape = getelementptr i8, ptr %ptr, i64 496
66+
; CHECK: %ind.escape = getelementptr i8, ptr %ind.end, i64 -16
6767
; CHECK-LABEL: for.end:
6868
; CHECK: %[[RET:.*]] = phi ptr [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
6969
; CHECK: ret ptr %[[RET]]
@@ -85,9 +85,7 @@ for.end:
8585

8686
; CHECK-LABEL: @both
8787
; CHECK-LABEL: middle.block:
88-
; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
89-
; CHECK: %[[END_OFFSET:.*]] = mul i64 %[[END]], 4
90-
; CHECK: %ind.escape = getelementptr i8, ptr %base, i64 %[[END_OFFSET]]
88+
; CHECK: %ind.escape = getelementptr i8, ptr %ind.end1, i64 -4
9189
; CHECK-LABEL: for.end:
9290
; CHECK: %[[RET:.*]] = phi ptr [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
9391
; CHECK: ret ptr %[[RET]]
@@ -142,7 +140,7 @@ for.end:
142140
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
143141
; CHECK: middle.block
144142
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
145-
; CHECK: %ind.escape = add i32 %[[T15]],
143+
; CHECK: %ind.escape = sub i32 %ind.end8, -8
146144
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
147145
define void @PR30742() {
148146
BB0:

llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ define i32 @test(ptr %arr, i64 %n) {
5151
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5252
; CHECK: middle.block:
5353
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
54-
; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
55-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i64 1, [[CMO]]
54+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
5655
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
5756
; CHECK: scalar.ph:
5857
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]

llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ define void @test1_pr58811() {
2828
; CHECK: vector.body:
2929
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3030
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
31-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
32-
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
31+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
32+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3333
; CHECK: middle.block:
34-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
34+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
3535
; CHECK-NEXT: br i1 false, label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]]
3636
; CHECK: scalar.ph:
3737
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ]
@@ -123,28 +123,28 @@ define void @test2_pr58811() {
123123
; CHECK: vector.body:
124124
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
125125
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
126-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
127-
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
126+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
127+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
128128
; CHECK: middle.block:
129-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
129+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
130130
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
131131
; CHECK: scalar.ph:
132132
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
133133
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
134134
; CHECK-NEXT: br label [[LOOP_3:%.*]]
135135
; CHECK: loop.3:
136-
; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
137-
; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
138-
; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[IV_2_LCSSA]]
139-
; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1
140-
; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198
136+
; CHECK-NEXT: [[IV_4:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
137+
; CHECK-NEXT: [[IV_5:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
138+
; CHECK-NEXT: [[SUB93_1]] = sub i32 [[IV_5]], [[IV_2_LCSSA]]
139+
; CHECK-NEXT: [[INC_1]] = add i16 [[IV_4]], 1
140+
; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_4]], 198
141141
; CHECK-NEXT: br i1 [[CMP88_1]], label [[LOOP_3]], label [[LOOP_4_PREHEADER]], !llvm.loop [[LOOP5:![0-9]+]]
142142
; CHECK: loop.4.preheader:
143-
; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
143+
; CHECK-NEXT: [[IV_5_LCSSA:%.*]] = phi i32 [ [[IV_5]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
144144
; CHECK-NEXT: br label [[LOOP_4]]
145145
; CHECK: loop.4:
146-
; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
147-
; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]]
146+
; CHECK-NEXT: [[IV_6:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
147+
; CHECK-NEXT: [[SUB93_2]] = sub i32 [[IV_6]], [[IV_5_LCSSA]]
148148
; CHECK-NEXT: br i1 false, label [[LOOP_4]], label [[LOOP_1_HEADER_LOOPEXIT]]
149149
;
150150
entry:
@@ -201,10 +201,10 @@ define void @test3_pr58811() {
201201
; CHECK: vector.body:
202202
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
203203
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
204-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
205-
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
204+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
205+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
206206
; CHECK: middle.block:
207-
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[TMP3]]
207+
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[TMP3]]
208208
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
209209
; CHECK: scalar.ph:
210210
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]

0 commit comments

Comments
 (0)