@@ -56,7 +56,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
56
56
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
57
57
; VF8UF2: [[VECTOR_BODY]]:
58
58
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
59
- ; VF8UF2-NEXT: [[TMP0:%.+ ]] = add i64 %index , 0
59
+ ; VF8UF2-NEXT: [[TMP0:%.* ]] = add i64 [[INDEX]] , 0
60
60
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
61
61
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
62
62
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
@@ -158,15 +158,13 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
158
158
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
159
159
; VF8UF1: [[VECTOR_BODY]]:
160
160
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
161
- ; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
162
161
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
163
162
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
164
163
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
165
164
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
166
165
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
167
166
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
168
167
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
169
- ; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
170
168
; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
171
169
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
172
170
; VF8UF1: [[MIDDLE_SPLIT]]:
@@ -175,7 +173,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
175
173
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
176
174
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
177
175
; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
178
- ; VF8UF1-NEXT: [[EARLY_EXIT_VALUE :%.*]] = extractelement <8 x i64> [[VEC_IND ]], i64 [[FIRST_ACTIVE_LANE]]
176
+ ; VF8UF1-NEXT: [[TMP8 :%.*]] = add i64 [[INDEX ]], [[FIRST_ACTIVE_LANE]]
179
177
; VF8UF1-NEXT: br label %[[EXIT]]
180
178
; VF8UF1: [[SCALAR_PH]]:
181
179
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -191,7 +189,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
191
189
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
192
190
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
193
191
; VF8UF1: [[EXIT]]:
194
- ; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE ]], %[[VECTOR_EARLY_EXIT]] ]
192
+ ; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8 ]], %[[VECTOR_EARLY_EXIT]] ]
195
193
; VF8UF1-NEXT: ret i64 [[RES]]
196
194
;
197
195
; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -202,8 +200,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
202
200
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
203
201
; VF8UF2: [[VECTOR_BODY]]:
204
202
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
205
- ; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
206
- ; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
207
203
; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
208
204
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
209
205
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
@@ -212,7 +208,6 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
212
208
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
213
209
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
214
210
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
215
- ; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
216
211
; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
217
212
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
218
213
; VF8UF2: [[MIDDLE_SPLIT]]:
@@ -221,7 +216,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
221
216
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
222
217
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
223
218
; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
224
- ; VF8UF2-NEXT: [[EARLY_EXIT_VALUE :%.*]] = extractelement <8 x i64> [[VEC_IND ]], i64 [[FIRST_ACTIVE_LANE]]
219
+ ; VF8UF2-NEXT: [[TMP8 :%.*]] = add i64 [[INDEX ]], [[FIRST_ACTIVE_LANE]]
225
220
; VF8UF2-NEXT: br label %[[EXIT]]
226
221
; VF8UF2: [[SCALAR_PH]]:
227
222
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -237,7 +232,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
237
232
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
238
233
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
239
234
; VF8UF2: [[EXIT]]:
240
- ; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE ]], %[[VECTOR_EARLY_EXIT]] ]
235
+ ; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8 ]], %[[VECTOR_EARLY_EXIT]] ]
241
236
; VF8UF2-NEXT: ret i64 [[RES]]
242
237
;
243
238
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -248,15 +243,13 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
248
243
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
249
244
; VF16UF1: [[VECTOR_BODY]]:
250
245
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
251
- ; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
252
246
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
253
247
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
254
248
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
255
249
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
256
250
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
257
251
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
258
252
; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
259
- ; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
260
253
; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
261
254
; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
262
255
; VF16UF1: [[MIDDLE_SPLIT]]:
@@ -265,7 +258,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
265
258
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
266
259
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
267
260
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
268
- ; VF16UF1-NEXT: [[EARLY_EXIT_VALUE :%.*]] = extractelement <16 x i64> [[VEC_IND ]], i64 [[FIRST_ACTIVE_LANE]]
261
+ ; VF16UF1-NEXT: [[TMP8 :%.*]] = add i64 [[INDEX ]], [[FIRST_ACTIVE_LANE]]
269
262
; VF16UF1-NEXT: br label %[[EXIT]]
270
263
; VF16UF1: [[SCALAR_PH]]:
271
264
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -281,7 +274,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
281
274
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
282
275
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
283
276
; VF16UF1: [[EXIT]]:
284
- ; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE ]], %[[VECTOR_EARLY_EXIT]] ]
277
+ ; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8 ]], %[[VECTOR_EARLY_EXIT]] ]
285
278
; VF16UF1-NEXT: ret i64 [[RES]]
286
279
;
287
280
entry:
0 commit comments