Skip to content

Commit 5bab59d

Browse files
committed
[SLP]Try to vectorize scalars, being vectorized already, but does not need to be scheduled.
If the scalar does not need to be scheduled and it was vectorized already in one of the vector nodes, we still can try to vectorize it in another node. Just does not need account its cost in the scalar total cost, as it will be handled in the main vectorized node. Differential Revision: https://reviews.llvm.org/D159205
1 parent 0a29827 commit 5bab59d

File tree

2 files changed

+26
-18
lines changed

2 files changed

+26
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2911,8 +2911,11 @@ class BoUpSLP {
29112911
}
29122912
if (Last->State != TreeEntry::NeedToGather) {
29132913
for (Value *V : VL) {
2914-
[[maybe_unused]] const TreeEntry *TE = getTreeEntry(V);
2915-
assert((!TE || TE == Last) && "Scalar already in tree!");
2914+
const TreeEntry *TE = getTreeEntry(V);
2915+
assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) &&
2916+
"Scalar already in tree!");
2917+
if (TE)
2918+
continue;
29162919
ScalarToTreeEntry[V] = Last;
29172920
}
29182921
// Update the scheduler bundle to point to this TreeEntry.
@@ -5813,7 +5816,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
58135816

58145817
// Check that none of the instructions in the bundle are already in the tree.
58155818
for (Value *V : VL) {
5816-
if (!IsScatterVectorizeUserTE && !isa<Instruction>(V))
5819+
if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
5820+
doesNotNeedToBeScheduled(V))
58175821
continue;
58185822
if (getTreeEntry(V)) {
58195823
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
@@ -7560,6 +7564,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
75607564
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
75617565
SetVector<Value *> UniqueValues(VL.begin(), VL.end());
75627566
const unsigned Sz = UniqueValues.size();
7567+
SmallBitVector UsedScalars(Sz, false);
7568+
for (unsigned I = 0; I < Sz; ++I) {
7569+
if (getTreeEntry(UniqueValues[I]) == E)
7570+
continue;
7571+
UsedScalars.set(I);
7572+
}
75637573
auto GetCostDiff =
75647574
[=](function_ref<InstructionCost(unsigned)> ScalarEltCost,
75657575
function_ref<InstructionCost(InstructionCost)> VectorCost) {
@@ -7569,10 +7579,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
75697579
// For some of the instructions no need to calculate cost for each
75707580
// particular instruction, we can use the cost of the single
75717581
// instruction x total number of scalar instructions.
7572-
ScalarCost = Sz * ScalarEltCost(0);
7582+
ScalarCost = (Sz - UsedScalars.count()) * ScalarEltCost(0);
75737583
} else {
7574-
for (unsigned I = 0; I < Sz; ++I)
7584+
for (unsigned I = 0; I < Sz; ++I) {
7585+
if (UsedScalars.test(I))
7586+
continue;
75757587
ScalarCost += ScalarEltCost(I);
7588+
}
75767589
}
75777590

75787591
InstructionCost VecCost = VectorCost(CommonCost);

llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,18 @@ define void @test(i1 %c, ptr %arg) {
66
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
77
; CHECK: if:
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG:%.*]], i32 0
9-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
10-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
11-
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <4 x ptr> [[TMP2]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
11+
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
1212
; CHECK-NEXT: br label [[JOIN:%.*]]
1313
; CHECK: else:
14-
; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8
15-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0
16-
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
17-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> <i64 32, i64 24>
18-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3
19-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
20-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 poison, i32 3>
21-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2
22-
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
14+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 0
15+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <4 x i32> zeroinitializer
16+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <4 x ptr> [[TMP6]], <4 x i64> <i64 32, i64 24, i64 8, i64 0>
17+
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP7]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> poison)
2318
; CHECK-NEXT: br label [[JOIN]]
2419
; CHECK: join:
25-
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ]
20+
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP4]], [[IF]] ], [ [[TMP8]], [[ELSE]] ]
2621
; CHECK-NEXT: ret void
2722
;
2823
br i1 %c, label %if, label %else

0 commit comments

Comments
 (0)