Skip to content

Commit ac01ae7

Browse files
committed
[SLP]Use ShuffleInstructionBuilder for vector shrinking.
We can use ShuffleInstructionBuilder now for shrinking shuffle emission. It allows to remove extra shuffle from the emitted code and reuse original vector. Part of D110978 Differential Revision: https://reviews.llvm.org/D140499
1 parent 498704d commit ac01ae7

File tree

3 files changed

+26
-27
lines changed

3 files changed

+26
-27
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8663,6 +8663,11 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
86638663
return TE->isOperandGatherNode({E, NodeIdx}) &&
86648664
VE->isSame(TE->Scalars);
86658665
}))) {
8666+
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
8667+
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
8668+
ShuffleBuilder.add(V, Mask);
8669+
return ShuffleBuilder.finalize(std::nullopt);
8670+
};
86668671
Value *V = vectorizeTree(VE);
86678672
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
86688673
if (!VE->ReuseShuffleIndices.empty()) {
@@ -8696,18 +8701,14 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
86968701
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
86978702
"less than original vector size.");
86988703
UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
8699-
V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
8704+
V = FinalShuffle(V, UniqueIdxs);
87008705
} else {
87018706
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
87028707
"Expected vectorization factor less "
87038708
"than original vector size.");
87048709
SmallVector<int> UniformMask(VF, 0);
87058710
std::iota(UniformMask.begin(), UniformMask.end(), 0);
8706-
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
8707-
}
8708-
if (auto *I = dyn_cast<Instruction>(V)) {
8709-
GatherShuffleExtractSeq.insert(I);
8710-
CSEBlocks.insert(I->getParent());
8711+
V = FinalShuffle(V, UniformMask);
87118712
}
87128713
}
87138714
return V;

llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ define void @wombat(ptr %ptr, ptr %ptr1) {
55
; CHECK-LABEL: @wombat(
66
; CHECK-NEXT: bb:
77
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i32 3
8-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
9-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
10-
; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
11-
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
12-
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
13-
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef
14-
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]]
15-
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]]
16-
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP27]], align 8
8+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
11+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -1, i32 -1>
12+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
13+
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef
14+
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]]
15+
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP6]]
16+
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP27]], align 8
1717
; CHECK-NEXT: ret void
1818
;
1919
bb:
@@ -58,12 +58,11 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
5858
; CHECK-NEXT: entry:
5959
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
6060
; CHECK: for.body:
61-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ]
62-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
61+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[TMP0]], [[IF_END:%.*]] ]
62+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
6363
; CHECK-NEXT: br label [[IF_END]]
6464
; CHECK: if.end:
65-
; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], ptr undef, align 2
66-
; CHECK-NEXT: [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> <i32 0, i32 4>
65+
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr undef, align 2
6766
; CHECK-NEXT: br label [[FOR_BODY]]
6867
;
6968
entry:

llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,22 @@ define void @foo(ptr %this, ptr %p, i32 %add7) {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[ADD7:%.*]], i32 0
1111
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], <i32 2, i32 2>
12-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
12+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
1313
; CHECK-NEXT: switch i32 undef, label [[SW_EPILOG:%.*]] [
1414
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
1515
; CHECK-NEXT: i32 2, label [[SW_BB]]
1616
; CHECK-NEXT: ]
1717
; CHECK: sw.bb:
18-
; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 2, i32 0>
19-
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
18+
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
2019
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4
21-
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP2]]
20+
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]]
2221
; CHECK-NEXT: br label [[SW_EPILOG]]
2322
; CHECK: sw.epilog:
2423
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP5]], [[SW_BB]] ]
25-
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
26-
; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> undef, [[SHUFFLE]]
27-
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SHUFFLE1]]
28-
; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[P:%.*]], align 4
24+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
25+
; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> undef, [[TMP2]]
26+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP7]]
27+
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[P:%.*]], align 4
2928
; CHECK-NEXT: ret void
3029
;
3130
entry:

0 commit comments

Comments
 (0)