Skip to content

Commit a04f615

Browse files
committed
[LV] Check for innermost loop instead of EnableVPlanNativePath in CM.
Replace EnableVPlanNativePath checks in the cost-model by assertions that the code is only called for innermost loops. This ensures that the cost model isn't used in the VPlanNativePath, which is only used for outer-loop vectorization. Even with EnableVPlanNativePath, inner loops are processed by the inner loop vectorization path, not the native path, so checking for EnableVPlanNativePath may impact decisions for inner loops and can cause crashes, like in the attached test case.
1 parent f4ed7f8 commit a04f615

File tree

2 files changed

+98
-19
lines changed

2 files changed

+98
-19
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,11 +1298,9 @@ class LoopVectorizationCostModel {
12981298
bool isProfitableToScalarize(Instruction *I, ElementCount VF) const {
12991299
assert(VF.isVector() &&
13001300
"Profitable to scalarize relevant only for VF > 1.");
1301-
1302-
// Cost model is not run in the VPlan-native path - return conservative
1303-
// result until this changes.
1304-
if (EnableVPlanNativePath)
1305-
return false;
1301+
assert(
1302+
TheLoop->isInnermost() &&
1303+
"cost-model should not be used for outer loops (in VPlan-native path)");
13061304

13071305
auto Scalars = InstsToScalarize.find(VF);
13081306
assert(Scalars != InstsToScalarize.end() &&
@@ -1312,6 +1310,9 @@ class LoopVectorizationCostModel {
13121310

13131311
/// Returns true if \p I is known to be uniform after vectorization.
13141312
bool isUniformAfterVectorization(Instruction *I, ElementCount VF) const {
1313+
assert(
1314+
TheLoop->isInnermost() &&
1315+
"cost-model should not be used for outer loops (in VPlan-native path)");
13151316
// Pseudo probe needs to be duplicated for each unrolled iteration and
13161317
// vector lane so that profiled loop trip count can be accurately
13171318
// accumulated instead of being under counted.
@@ -1321,11 +1322,6 @@ class LoopVectorizationCostModel {
13211322
if (VF.isScalar())
13221323
return true;
13231324

1324-
// Cost model is not run in the VPlan-native path - return conservative
1325-
// result until this changes.
1326-
if (EnableVPlanNativePath)
1327-
return false;
1328-
13291325
auto UniformsPerVF = Uniforms.find(VF);
13301326
assert(UniformsPerVF != Uniforms.end() &&
13311327
"VF not yet analyzed for uniformity");
@@ -1334,14 +1330,12 @@ class LoopVectorizationCostModel {
13341330

13351331
/// Returns true if \p I is known to be scalar after vectorization.
13361332
bool isScalarAfterVectorization(Instruction *I, ElementCount VF) const {
1333+
assert(
1334+
TheLoop->isInnermost() &&
1335+
"cost-model should not be used for outer loops (in VPlan-native path)");
13371336
if (VF.isScalar())
13381337
return true;
13391338

1340-
// Cost model is not run in the VPlan-native path - return conservative
1341-
// result until this changes.
1342-
if (EnableVPlanNativePath)
1343-
return false;
1344-
13451339
auto ScalarsPerVF = Scalars.find(VF);
13461340
assert(ScalarsPerVF != Scalars.end() &&
13471341
"Scalar values are not calculated for VF");
@@ -1399,10 +1393,9 @@ class LoopVectorizationCostModel {
13991393
/// through the cost modeling.
14001394
InstWidening getWideningDecision(Instruction *I, ElementCount VF) const {
14011395
assert(VF.isVector() && "Expected VF to be a vector VF");
1402-
// Cost model is not run in the VPlan-native path - return conservative
1403-
// result until this changes.
1404-
if (EnableVPlanNativePath)
1405-
return CM_GatherScatter;
1396+
assert(
1397+
TheLoop->isInnermost() &&
1398+
"cost-model should not be used for outer loops (in VPlan-native path)");
14061399

14071400
std::pair<Instruction *, ElementCount> InstOnVF = std::make_pair(I, VF);
14081401
auto Itr = WideningDecisions.find(InstOnVF);
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -p loop-vectorize -enable-vplan-native-path -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
3+
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
; -enable-vplan-native-path should not impact codegen for inner loops.
9+
10+
define void @test(ptr %A) {
11+
; CHECK-LABEL: define void @test(
12+
; CHECK-SAME: ptr [[A:%.*]]) {
13+
; CHECK-NEXT: entry:
14+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15+
; CHECK: vector.ph:
16+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
17+
; CHECK: vector.body:
18+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
19+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
20+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
21+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
22+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
23+
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[TMP0]], 1
24+
; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP1]], 1
25+
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 1
26+
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 1
27+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
28+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
29+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
30+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
31+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
32+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP12]], align 4
33+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
34+
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], <i32 2, i32 2, i32 2, i32 2>
35+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i32 0
36+
; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP8]], align 4
37+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 1
38+
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP9]], align 4
39+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP13]], i32 2
40+
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP10]], align 4
41+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
42+
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP11]], align 4
43+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
44+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
45+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
46+
; CHECK: middle.block:
47+
; CHECK-NEXT: br label [[SCALAR_PH]]
48+
; CHECK: scalar.ph:
49+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
50+
; CHECK-NEXT: br label [[LOOP:%.*]]
51+
; CHECK: loop:
52+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
53+
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[IV]], 1
54+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL]]
55+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
56+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], 2
57+
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4
58+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
59+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
60+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
61+
; CHECK: exit:
62+
; CHECK-NEXT: ret void
63+
;
64+
entry:
65+
br label %loop
66+
67+
loop:
68+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
69+
%mul = shl nsw i64 %iv, 1
70+
%gep = getelementptr inbounds i32, ptr %A, i64 %mul
71+
%l = load i32, ptr %gep, align 4
72+
%add = add i32 %l, 2
73+
store i32 %add, ptr %gep
74+
%iv.next = add nsw i64 %iv, 1
75+
%ec = icmp eq i64 %iv.next, 100
76+
br i1 %ec, label %exit, label %loop
77+
78+
exit:
79+
ret void
80+
}
81+
;.
82+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
83+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
84+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
85+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
86+
;.

0 commit comments

Comments
 (0)