@@ -1509,7 +1509,8 @@ outer.exit:
1509
1509
ret void
1510
1510
}
1511
1511
1512
- ; TODO: STRIDE_CHECK can be eliminated via loop guards.
1512
+ ; The stride for the access in the inner loop is known to be non-negative via
1513
+ ; loop guards.
1513
1514
define void @stride_check_known_via_loop_guard (ptr %C , ptr %A , i32 %Acols ) {
1514
1515
; CHECK-LABEL: define void @stride_check_known_via_loop_guard
1515
1516
; CHECK-SAME: (ptr [[C:%.*]], ptr [[A:%.*]], i32 [[ACOLS:%.*]]) {
@@ -1518,8 +1519,6 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
1518
1519
; CHECK-NEXT: br i1 [[PRE_C]], label [[EXIT:%.*]], label [[OUTER_HEADER_PREHEADER:%.*]]
1519
1520
; CHECK: outer.header.preheader:
1520
1521
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
1521
- ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ACOLS]] to i64
1522
- ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 3
1523
1522
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 34359738368
1524
1523
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
1525
1524
; CHECK: outer.header:
@@ -1533,23 +1532,21 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
1533
1532
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
1534
1533
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP]]
1535
1534
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1536
- ; CHECK-NEXT: [[STRIDE_CHECK:%.*]] = icmp slt i64 [[TMP1]], 0
1537
- ; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[FOUND_CONFLICT]], [[STRIDE_CHECK]]
1538
- ; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1535
+ ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1539
1536
; CHECK: vector.ph:
1540
1537
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1541
1538
; CHECK: vector.body:
1542
1539
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1543
- ; CHECK-NEXT: [[TMP3 :%.*]] = add i32 [[INDEX]], 0
1544
- ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP3 ]]
1545
- ; CHECK-NEXT: [[TMP5 :%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
1546
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP5 ]], i64 0
1540
+ ; CHECK-NEXT: [[TMP0 :%.*]] = add i32 [[INDEX]], 0
1541
+ ; CHECK-NEXT: [[TMP1 :%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[TMP0 ]]
1542
+ ; CHECK-NEXT: [[TMP2 :%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]], !noalias [[META72:![0-9]+]]
1543
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP2 ]], i64 0
1547
1544
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1548
- ; CHECK-NEXT: [[TMP6 :%.*]] = getelementptr inbounds double, ptr [[TMP4 ]], i32 0
1549
- ; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP6 ]], align 8, !alias.scope [[META72]]
1545
+ ; CHECK-NEXT: [[TMP3 :%.*]] = getelementptr inbounds double, ptr [[TMP1 ]], i32 0
1546
+ ; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP3 ]], align 8, !alias.scope [[META72]]
1550
1547
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1551
- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1552
- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1548
+ ; CHECK-NEXT: [[TMP4 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
1549
+ ; CHECK-NEXT: br i1 [[TMP4 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
1553
1550
; CHECK: middle.block:
1554
1551
; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]]
1555
1552
; CHECK: scalar.ph:
0 commit comments