@@ -311,7 +311,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
311
311
; CHECK: vector.ph:
312
312
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
313
313
; CHECK: vector.body:
314
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2 :%.*]] ]
314
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4 :%.*]] ]
315
315
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
316
316
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
317
317
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]]
@@ -321,30 +321,33 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
321
321
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
322
322
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
323
323
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
324
- ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
325
- ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
324
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
325
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
326
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1
327
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
328
+ ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
329
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
330
+ ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
326
331
; CHECK: pred.store.if:
327
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP0]]
328
- ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
329
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
330
- ; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i32 [[TMP8]], 2
331
- ; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
332
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
333
+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0
334
+ ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2
335
+ ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4
332
336
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
333
337
; CHECK: pred.store.continue:
334
- ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
335
- ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
336
- ; CHECK: pred.store.if1:
337
- ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], -1
338
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP12]]
339
- ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
340
- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP12]]
341
- ; CHECK-NEXT: [[TMP16:%.*]] = shl nsw i32 [[TMP14]], 2
342
- ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
343
- ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
344
- ; CHECK: pred.store.continue2:
338
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
339
+ ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
340
+ ; CHECK: pred.store.if3:
341
+ ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
342
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
343
+ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1
344
+ ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2
345
+ ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
346
+ ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
347
+ ; CHECK: pred.store.continue4:
345
348
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
346
- ; CHECK-NEXT: [[TMP17 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
347
- ; CHECK-NEXT: br i1 [[TMP17 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
349
+ ; CHECK-NEXT: [[TMP18 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
350
+ ; CHECK-NEXT: br i1 [[TMP18 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
348
351
; CHECK: middle.block:
349
352
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
350
353
; CHECK: scalar.ph:
@@ -353,13 +356,13 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) {
353
356
; CHECK: for.body:
354
357
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
355
358
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[INDVARS_IV]]
356
- ; CHECK-NEXT: [[TMP18 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
357
- ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP18 ]], 3
359
+ ; CHECK-NEXT: [[TMP19 :%.*]] = load i32, ptr [[ARRAYIDX]], align 4
360
+ ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19 ]], 3
358
361
; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
359
362
; CHECK: if.then:
360
363
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[INDVARS_IV]]
361
- ; CHECK-NEXT: [[TMP19 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
362
- ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP19 ]], 2
364
+ ; CHECK-NEXT: [[TMP20 :%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
365
+ ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20 ]], 2
363
366
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[INDVARS_IV]]
364
367
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4
365
368
; CHECK-NEXT: br label [[FOR_INC]]
@@ -635,26 +638,26 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly
635
638
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i32> [[REVERSE]], <i32 3, i32 3>
636
639
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
637
640
; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[VEC_IND]], <i64 2, i64 2>
638
- ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
639
- ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
641
+ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
642
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP7]]
643
+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
644
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]]
645
+ ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4
646
+ ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
647
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
648
+ ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
640
649
; CHECK: pred.store.if:
641
- ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
642
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]]
643
- ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
644
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
645
- ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP10]], 2
646
- ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4
650
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]]
651
+ ; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2
652
+ ; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
647
653
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
648
654
; CHECK: pred.store.continue:
649
- ; CHECK-NEXT: [[TMP13 :%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
650
- ; CHECK-NEXT: br i1 [[TMP13 ]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
655
+ ; CHECK-NEXT: [[TMP16 :%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
656
+ ; CHECK-NEXT: br i1 [[TMP16 ]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
651
657
; CHECK: pred.store.if1:
652
- ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1
653
- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
654
- ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]]
655
- ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
656
- ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]]
657
- ; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP17]], 2
658
+ ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1
659
+ ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]]
660
+ ; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2
658
661
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
659
662
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
660
663
; CHECK: pred.store.continue2:
0 commit comments