Skip to content

Commit 43e62e8

Browse files
committed
[LV] Stengthen loop-invariance checks in isPredicatedInst
Check loop-invariance against SCEV as well.
1 parent 1859b3c commit 43e62e8

File tree

4 files changed

+9
-146
lines changed

4 files changed

+9
-146
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,8 +2764,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
27642764

27652765
bool LoopAccessInfo::isInvariant(Value *V) const {
27662766
auto *SE = PSE->getSE();
2767-
// TODO: Is this really what we want? Even without FP SCEV, we may want some
2768-
// trivially loop-invariant FP values to be considered invariant.
2767+
if (TheLoop->isLoopInvariant(V))
2768+
return true;
27692769
if (!SE->isSCEVable(V->getType()))
27702770
return false;
27712771
const SCEV *S = SE->getSCEV(V);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3101,14 +3101,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
31013101
// is correct. The easiest form of the later is to require that all values
31023102
// stored are the same.
31033103
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3104-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3104+
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
31053105
}
31063106
case Instruction::UDiv:
31073107
case Instruction::SDiv:
31083108
case Instruction::SRem:
31093109
case Instruction::URem:
31103110
// If the divisor is loop-invariant no predication is needed.
3111-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3111+
return !Legal->isInvariant(I->getOperand(1));
31123112
}
31133113
}
31143114

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 2 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -17,126 +17,16 @@ define void @test(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
1818
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1919
; CHECK: vector.body:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
2222
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
2323
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
29-
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
30-
; CHECK: pred.store.if:
31-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0
32-
; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1
33-
; CHECK-NEXT: br label [[VECTOR_BODY]]
34-
; CHECK: pred.store.continue:
35-
; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
36-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
37-
; CHECK: pred.store.if3:
38-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1
39-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40-
; CHECK-NEXT: br label [[SCALAR_PH]]
41-
; CHECK: pred.store.continue4:
42-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
44-
; CHECK: pred.store.if5:
45-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2
46-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
48-
; CHECK: pred.store.continue6:
49-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
51-
; CHECK: pred.store.if7:
52-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3
53-
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
54-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]]
55-
; CHECK: pred.store.continue8:
56-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
57-
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
58-
; CHECK: pred.store.if9:
59-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4
60-
; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
61-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
62-
; CHECK: pred.store.continue10:
63-
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
64-
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
65-
; CHECK: pred.store.if11:
66-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5
67-
; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1
68-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
69-
; CHECK: pred.store.continue12:
70-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
71-
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
72-
; CHECK: pred.store.if13:
73-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6
74-
; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1
75-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
76-
; CHECK: pred.store.continue14:
77-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
78-
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
79-
; CHECK: pred.store.if15:
80-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7
81-
; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1
82-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
83-
; CHECK: pred.store.continue16:
84-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
85-
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
86-
; CHECK: pred.store.if17:
87-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8
88-
; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1
89-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
90-
; CHECK: pred.store.continue18:
91-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
92-
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
93-
; CHECK: pred.store.if19:
94-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9
95-
; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1
96-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
97-
; CHECK: pred.store.continue20:
98-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
99-
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
100-
; CHECK: pred.store.if21:
101-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10
102-
; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1
103-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
104-
; CHECK: pred.store.continue22:
105-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
106-
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
107-
; CHECK: pred.store.if23:
108-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11
109-
; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1
110-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
111-
; CHECK: pred.store.continue24:
112-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
113-
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
114-
; CHECK: pred.store.if25:
115-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12
116-
; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1
117-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
118-
; CHECK: pred.store.continue26:
119-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
120-
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
121-
; CHECK: pred.store.if27:
122-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13
123-
; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1
124-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
125-
; CHECK: pred.store.continue28:
126-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
127-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
128-
; CHECK: pred.store.if29:
129-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14
130-
; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1
131-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
132-
; CHECK: pred.store.continue30:
133-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
134-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE8]]
135-
; CHECK: pred.store.if31:
13628
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
13729
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
138-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
139-
; CHECK: pred.store.continue32:
14030
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
14131
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
14232
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,16 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
1818
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1919
; CHECK: [[VECTOR_BODY]]:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
2222
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
2323
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
28-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
29-
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
30-
; CHECK: [[PRED_STORE_IF]]:
31-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
32-
; CHECK-NEXT: store i8 [[TMP17]], ptr [[P]], align 1
33-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
34-
; CHECK: [[PRED_STORE_CONTINUE]]:
35-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
36-
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
37-
; CHECK: [[PRED_STORE_IF3]]:
38-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
39-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
41-
; CHECK: [[PRED_STORE_CONTINUE4]]:
42-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
44-
; CHECK: [[PRED_STORE_IF5]]:
45-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
46-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
48-
; CHECK: [[PRED_STORE_CONTINUE6]]:
49-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
51-
; CHECK: [[PRED_STORE_IF7]]:
5228
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
5329
; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
54-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
55-
; CHECK: [[PRED_STORE_CONTINUE8]]:
5630
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5731
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
5832
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
@@ -138,8 +112,7 @@ define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) {
138112
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
139113
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
140114
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
141-
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i8> [[TMP8]], <4 x i8> splat (i8 1)
142-
; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP9]]
115+
; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP8]]
143116
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
144117
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
145118
; CHECK: [[PRED_STORE_IF]]:

0 commit comments

Comments
 (0)