Skip to content

Commit da286c8

Browse files
authored
[VectorCombine] foldShuffleToIdentity - peek through bitcasts to see if they come from the same value to form identity sequence (#98334)
Workaround until I can get #96884 fixed properly - when trying to find identity sequences, peek through any bitcasts to see if the values all came from the same source. We don't run CSE frequently enough to merge all the bitcasts that we end up with.
1 parent 2da30f8 commit da286c8

File tree

3 files changed

+26
-70
lines changed

3 files changed

+26
-70
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,13 +1859,19 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
18591859
if (!FrontU)
18601860
return false;
18611861

1862+
// Helper to peek through bitcasts to the same value.
1863+
auto IsEquiv = [&](Value *X, Value *Y) {
1864+
return X->getType() == Y->getType() &&
1865+
peekThroughBitcasts(X) == peekThroughBitcasts(Y);
1866+
};
1867+
18621868
// Look for an identity value.
18631869
if (FrontLane == 0 &&
18641870
cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
18651871
Ty->getNumElements() &&
1866-
all_of(drop_begin(enumerate(Item)), [Item](const auto &E) {
1872+
all_of(drop_begin(enumerate(Item)), [IsEquiv, Item](const auto &E) {
18671873
Value *FrontV = Item.front().first->get();
1868-
return !E.value().first || (E.value().first->get() == FrontV &&
1874+
return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
18691875
E.value().second == (int)E.index());
18701876
})) {
18711877
IdentityLeafs.insert(FrontU);

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 16 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,10 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
8888
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
8989
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
9090
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
91-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
93-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
94-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
95-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]]
97-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
99-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
100-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
101-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
102-
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]]
103-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
104-
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <4 x i64>
91+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
92+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
93+
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
94+
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
10595
; CHECK-NEXT: ret <4 x i64> [[RES]]
10696
;
10797
%a.bc = bitcast <4 x i64> %a to <32 x i8>
@@ -129,20 +119,10 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
129119
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
130120
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
131121
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
132-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
133-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
134-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
135-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
136-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
137-
; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP5]], <8 x i16> [[TMP3]]
138-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
139-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <16 x i16>
140-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
141-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <16 x i16>
142-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
143-
; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i16> [[TMP11]], <8 x i16> [[TMP9]]
144-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
145-
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP13]] to <4 x i64>
122+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
123+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
124+
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
125+
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
146126
; CHECK-NEXT: ret <4 x i64> [[RES]]
147127
;
148128
%a.bc = bitcast <4 x i64> %a to <32 x i8>
@@ -276,20 +256,10 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64
276256
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
277257
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
278258
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
279-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
280-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
281-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
282-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
283-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
284-
; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]]
285-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
286-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32>
287-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
288-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32>
289-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
290-
; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]]
291-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
292-
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <8 x i64>
259+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
260+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
261+
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
262+
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
293263
; CHECK-NEXT: ret <8 x i64> [[RES]]
294264
;
295265
%a.bc = bitcast <8 x i64> %a to <64 x i8>
@@ -317,20 +287,10 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
317287
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
318288
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
319289
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
320-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
321-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
322-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
323-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
324-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
325-
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]]
326-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
327-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <32 x i16>
328-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
329-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <32 x i16>
330-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i16> [[TMP10]], <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
331-
; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i16> [[TMP11]], <16 x i16> [[TMP9]]
332-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP12]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
333-
; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP13]] to <8 x i64>
290+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
291+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
292+
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
293+
; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
334294
; CHECK-NEXT: ret <8 x i64> [[RES]]
335295
;
336296
%a.bc = bitcast <8 x i64> %a to <64 x i8>

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -993,25 +993,15 @@ define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %va
993993
ret void
994994
}
995995

996-
; TODO: Peek through (repeated) bitcasts to find a common source value.
996+
; Peek through (repeated) bitcasts to find a common source value.
997997
define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
998998
; CHECK-LABEL: @bitcast_smax_v8i32_v4i32(
999999
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
10001000
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
10011001
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
1002-
; CHECK-NEXT: [[CMP_LO:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1003-
; CHECK-NEXT: [[CMP_HI:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
10041002
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
10051003
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
1006-
; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1007-
; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1008-
; CHECK-NEXT: [[LO:%.*]] = select <4 x i1> [[CMP_LO]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]]
1009-
; CHECK-NEXT: [[A_BC2:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
1010-
; CHECK-NEXT: [[B_BC2:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
1011-
; CHECK-NEXT: [[A_HI:%.*]] = shufflevector <8 x i32> [[A_BC2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1012-
; CHECK-NEXT: [[B_HI:%.*]] = shufflevector <8 x i32> [[B_BC2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1013-
; CHECK-NEXT: [[HI:%.*]] = select <4 x i1> [[CMP_HI]], <4 x i32> [[B_HI]], <4 x i32> [[A_HI]]
1014-
; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1004+
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
10151005
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
10161006
; CHECK-NEXT: ret <4 x i64> [[RES]]
10171007
;

0 commit comments

Comments
 (0)