Skip to content

Commit 45aa3b5

Browse files
committed
Rebase + fix
* Regenerated some new tests after rebase. * Fixed an issue where we were assuming that FirstActiveLane always returned a 64-bit value.
1 parent 76aed7d commit 45aa3b5

File tree

2 files changed

+13
-17
lines changed

2 files changed

+13
-17
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -768,10 +768,13 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
768768
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
769769
VPValue *FirstActiveLane =
770770
B.createNaryOp(VPInstruction::FirstActiveLane, Mask, DL);
771-
if (CanonicalIVType != TypeInfo.inferScalarType(FirstActiveLane)) {
772-
Instruction::CastOps CastOp = CanonicalIVType->getScalarSizeInBits() < 64
773-
? Instruction::Trunc
774-
: Instruction::ZExt;
771+
Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
772+
if (CanonicalIVType != FirstActiveLaneType) {
773+
Instruction::CastOps CastOp =
774+
CanonicalIVType->getScalarSizeInBits() <
775+
FirstActiveLaneType->getScalarSizeInBits()
776+
? Instruction::Trunc
777+
: Instruction::ZExt;
775778
FirstActiveLane =
776779
B.createScalarCast(CastOp, FirstActiveLane, CanonicalIVType, DL);
777780
}

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
165165
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
166166
; VF8UF1: [[VECTOR_BODY]]:
167167
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
168-
; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
169168
; VF8UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
170169
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
171170
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
@@ -174,7 +173,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
174173
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
175174
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
176175
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
177-
; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
178176
; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
179177
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
180178
; VF8UF1: [[MIDDLE_SPLIT]]:
@@ -183,7 +181,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
183181
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
184182
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
185183
; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
186-
; VF8UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
184+
; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
187185
; VF8UF1-NEXT: br label %[[EXIT]]
188186
; VF8UF1: [[SCALAR_PH]]:
189187
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -199,7 +197,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
199197
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
200198
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
201199
; VF8UF1: [[EXIT]]:
202-
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
200+
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
203201
; VF8UF1-NEXT: ret i64 [[RES]]
204202
;
205203
; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -211,8 +209,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
211209
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
212210
; VF8UF2: [[VECTOR_BODY]]:
213211
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
214-
; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
215-
; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
216212
; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
217213
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
218214
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
@@ -221,7 +217,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
221217
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
222218
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
223219
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
224-
; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
225220
; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
226221
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
227222
; VF8UF2: [[MIDDLE_SPLIT]]:
@@ -230,7 +225,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
230225
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
231226
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
232227
; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
233-
; VF8UF2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
228+
; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
234229
; VF8UF2-NEXT: br label %[[EXIT]]
235230
; VF8UF2: [[SCALAR_PH]]:
236231
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -246,7 +241,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
246241
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
247242
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
248243
; VF8UF2: [[EXIT]]:
249-
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
244+
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
250245
; VF8UF2-NEXT: ret i64 [[RES]]
251246
;
252247
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -258,7 +253,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
258253
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
259254
; VF16UF1: [[VECTOR_BODY]]:
260255
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
261-
; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
262256
; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
263257
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
264258
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
@@ -267,7 +261,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
267261
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
268262
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
269263
; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
270-
; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
271264
; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
272265
; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
273266
; VF16UF1: [[MIDDLE_SPLIT]]:
@@ -276,7 +269,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
276269
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
277270
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
278271
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
279-
; VF16UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
272+
; VF16UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
280273
; VF16UF1-NEXT: br label %[[EXIT]]
281274
; VF16UF1: [[SCALAR_PH]]:
282275
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -292,7 +285,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
292285
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
293286
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
294287
; VF16UF1: [[EXIT]]:
295-
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
288+
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
296289
; VF16UF1-NEXT: ret i64 [[RES]]
297290
;
298291
entry:

0 commit comments

Comments
 (0)