@@ -213,7 +213,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
213
213
; CHECK: vector.ph:
214
214
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
215
215
; CHECK: vector.body:
216
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2 :%.*]] ]
216
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4 :%.*]] ]
217
217
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
218
218
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
219
219
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]]
@@ -223,32 +223,33 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
223
223
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
224
224
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
225
225
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
226
- ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
227
- ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
226
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
227
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
228
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1
229
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
230
+ ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
231
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
232
+ ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
228
233
; CHECK: pred.store.if:
229
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
230
- ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
231
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
232
- ; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i32 [[TMP8]], 2
233
- ; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
234
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
235
+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
236
+ ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
237
+ ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
234
238
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
235
239
; CHECK: pred.store.continue:
236
- ; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_STORE_IF]] ]
237
- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
238
- ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
239
- ; CHECK: pred.store.if1:
240
- ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], -1
241
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP13]]
242
- ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
243
- ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP13]]
244
- ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP15]], 2
245
- ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4
246
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
247
- ; CHECK: pred.store.continue2:
248
- ; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF1]] ]
240
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
241
+ ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
242
+ ; CHECK: pred.store.if3:
243
+ ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
244
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
245
+ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1
246
+ ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2
247
+ ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
248
+ ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
249
+ ; CHECK: pred.store.continue4:
249
250
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
250
- ; CHECK-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
251
- ; CHECK-NEXT: br i1 [[TMP19 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
251
+ ; CHECK-NEXT: [[TMP18 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
252
+ ; CHECK-NEXT: br i1 [[TMP18 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
252
253
; CHECK: middle.block:
253
254
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
254
255
; CHECK: scalar.ph:
@@ -257,13 +258,13 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
257
258
; CHECK: for.body:
258
259
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
259
260
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[INDVARS_IV]]
260
- ; CHECK-NEXT: [[TMP20 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
261
- ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP20 ]], 3
261
+ ; CHECK-NEXT: [[TMP19 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
262
+ ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19 ]], 3
262
263
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
263
264
; CHECK: if.then:
264
265
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[INDVARS_IV]]
265
- ; CHECK-NEXT: [[TMP21 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
266
- ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21 ]], 2
266
+ ; CHECK-NEXT: [[TMP20 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
267
+ ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20 ]], 2
267
268
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[INDVARS_IV]]
268
269
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
269
270
; CHECK-NEXT: br label [[FOR_INC]]
@@ -541,35 +542,33 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
541
542
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
542
543
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
543
544
; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], <i64 2, i64 2>
544
- ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
545
- ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
545
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
546
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]]
547
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
548
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
549
+ ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
550
+ ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
551
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
552
+ ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
546
553
; CHECK: pred.store.if:
547
- ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
548
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
549
- ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
550
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
551
- ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP10]], 2
552
- ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
554
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
555
+ ; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
556
+ ; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
553
557
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
554
558
; CHECK: pred.store.continue:
555
- ; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_STORE_IF]] ]
556
- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
557
- ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
559
+ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
560
+ ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
558
561
; CHECK: pred.store.if1:
559
- ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], -1
560
- ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
561
- ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP16]]
562
- ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
563
- ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]]
564
- ; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP18]], 2
565
- ; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4
562
+ ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
563
+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]]
564
+ ; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2
565
+ ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
566
566
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
567
567
; CHECK: pred.store.continue2:
568
- ; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP18]], [[PRED_STORE_IF1]] ]
569
568
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
570
569
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
571
- ; CHECK-NEXT: [[TMP22 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
572
- ; CHECK-NEXT: br i1 [[TMP22 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
570
+ ; CHECK-NEXT: [[TMP20 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
571
+ ; CHECK-NEXT: br i1 [[TMP20 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
573
572
; CHECK: middle.block:
574
573
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
575
574
; CHECK: scalar.ph:
@@ -578,14 +577,14 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
578
577
; CHECK: for.body:
579
578
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
580
579
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[INDVARS_IV]]
581
- ; CHECK-NEXT: [[TMP23 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
582
- ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP23 ]], 3
580
+ ; CHECK-NEXT: [[TMP21 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
581
+ ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21 ]], 3
583
582
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
584
583
; CHECK: if.then:
585
584
; CHECK-NEXT: [[INDVARS_IV_STRIDED:%.*]] = mul i64 [[INDVARS_IV]], 2
586
585
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[INDVARS_IV_STRIDED]]
587
- ; CHECK-NEXT: [[TMP24 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
588
- ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP24 ]], 2
586
+ ; CHECK-NEXT: [[TMP22 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
587
+ ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP22 ]], 2
589
588
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[INDVARS_IV]]
590
589
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
591
590
; CHECK-NEXT: br label [[FOR_INC]]
0 commit comments