Skip to content

Commit 284b1dd

Browse files
committed
Rebase + fix
* Regenerated some new tests after rebase. * Fixed an issue where we were assuming that FirstActiveLane always returned a 64-bit value.
1 parent 693a926 commit 284b1dd

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -772,10 +772,13 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
772772
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
773773
VPValue *FirstActiveLane =
774774
B.createNaryOp(VPInstruction::FirstActiveLane, Mask, DL);
775-
if (CanonicalIVType != TypeInfo.inferScalarType(FirstActiveLane)) {
776-
Instruction::CastOps CastOp = CanonicalIVType->getScalarSizeInBits() < 64
777-
? Instruction::Trunc
778-
: Instruction::ZExt;
775+
Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
776+
if (CanonicalIVType != FirstActiveLaneType) {
777+
Instruction::CastOps CastOp =
778+
CanonicalIVType->getScalarSizeInBits() <
779+
FirstActiveLaneType->getScalarSizeInBits()
780+
? Instruction::Trunc
781+
: Instruction::ZExt;
779782
FirstActiveLane =
780783
B.createScalarCast(CastOp, FirstActiveLane, CanonicalIVType, DL);
781784
}

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
5656
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
5757
; VF8UF2: [[VECTOR_BODY]]:
5858
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
59-
; VF8UF2-NEXT: [[TMP0:%.+]] = add i64 %index, 0
59+
; VF8UF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
6060
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
6161
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
6262
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
@@ -158,15 +158,13 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
158158
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
159159
; VF8UF1: [[VECTOR_BODY]]:
160160
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
161-
; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
162161
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
163162
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
164163
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
165164
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
166165
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
167166
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
168167
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
169-
; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
170168
; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
171169
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
172170
; VF8UF1: [[MIDDLE_SPLIT]]:
@@ -175,7 +173,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
175173
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
176174
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
177175
; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
178-
; VF8UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
176+
; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
179177
; VF8UF1-NEXT: br label %[[EXIT]]
180178
; VF8UF1: [[SCALAR_PH]]:
181179
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -191,7 +189,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
191189
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
192190
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
193191
; VF8UF1: [[EXIT]]:
194-
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
192+
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
195193
; VF8UF1-NEXT: ret i64 [[RES]]
196194
;
197195
; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -202,8 +200,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
202200
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
203201
; VF8UF2: [[VECTOR_BODY]]:
204202
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
205-
; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
206-
; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
207203
; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
208204
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
209205
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
@@ -212,7 +208,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
212208
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
213209
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
214210
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
215-
; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
216211
; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
217212
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
218213
; VF8UF2: [[MIDDLE_SPLIT]]:
@@ -221,7 +216,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
221216
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
222217
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
223218
; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
224-
; VF8UF2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
219+
; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
225220
; VF8UF2-NEXT: br label %[[EXIT]]
226221
; VF8UF2: [[SCALAR_PH]]:
227222
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -237,7 +232,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
237232
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
238233
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
239234
; VF8UF2: [[EXIT]]:
240-
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
235+
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
241236
; VF8UF2-NEXT: ret i64 [[RES]]
242237
;
243238
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -248,15 +243,13 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
248243
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
249244
; VF16UF1: [[VECTOR_BODY]]:
250245
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
251-
; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
252246
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
253247
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
254248
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
255249
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
256250
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
257251
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
258252
; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
259-
; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
260253
; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
261254
; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
262255
; VF16UF1: [[MIDDLE_SPLIT]]:
@@ -265,7 +258,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
265258
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
266259
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
267260
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
268-
; VF16UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
261+
; VF16UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
269262
; VF16UF1-NEXT: br label %[[EXIT]]
270263
; VF16UF1: [[SCALAR_PH]]:
271264
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -281,7 +274,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
281274
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
282275
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
283276
; VF16UF1: [[EXIT]]:
284-
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
277+
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
285278
; VF16UF1-NEXT: ret i64 [[RES]]
286279
;
287280
entry:

0 commit comments

Comments
 (0)