Skip to content

Commit 94795a3

Browse files
committed
[VectorCombine] foldBitcastShuf - add support for length changing shuffles
Allow length changing shuffle masks in the "bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'" fold. It also exposes some poor shuffle mask detection for extract/insert subvector cases inside improveShuffleKindFromMask First stage towards addressing Issue llvm#67803
1 parent 3bae69e commit 94795a3

File tree

3 files changed

+69
-31
lines changed

3 files changed

+69
-31
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -689,15 +689,18 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
689689
// 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
690690
// scalable type is unknown; Second, we cannot reason if the narrowed shuffle
691691
// mask for scalable type is a splat or not.
692-
// 2) Disallow non-vector casts and length-changing shuffles.
692+
// 2) Disallow non-vector casts.
693693
// TODO: We could allow any shuffle.
694+
auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
694695
auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
695-
if (!SrcTy || I.getOperand(0)->getType() != SrcTy)
696+
if (!DestTy || !SrcTy)
696697
return false;
697698

698-
auto *DestTy = cast<FixedVectorType>(I.getType());
699699
unsigned DestEltSize = DestTy->getScalarSizeInBits();
700700
unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
701+
if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
702+
return false;
703+
701704
SmallVector<int, 16> NewMask;
702705
if (DestEltSize <= SrcEltSize) {
703706
// The bitcast is from wide to narrow/equal elements. The shuffle mask can
@@ -714,18 +717,23 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
714717
return false;
715718
}
716719

720+
// Bitcast the shuffle src - keep its original width but using the destination
721+
// scalar type.
722+
unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
723+
auto *ShuffleTy = FixedVectorType::get(DestTy->getScalarType(), NumSrcElts);
724+
717725
// The new shuffle must not cost more than the old shuffle. The bitcast is
718726
// moved ahead of the shuffle, so assume that it has the same cost as before.
719727
InstructionCost DestCost = TTI.getShuffleCost(
720-
TargetTransformInfo::SK_PermuteSingleSrc, DestTy, NewMask);
728+
TargetTransformInfo::SK_PermuteSingleSrc, ShuffleTy, NewMask);
721729
InstructionCost SrcCost =
722730
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy, Mask);
723731
if (DestCost > SrcCost || !DestCost.isValid())
724732
return false;
725733

726734
// bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
727735
++NumShufOfBitcast;
728-
Value *CastV = Builder.CreateBitCast(V, DestTy);
736+
Value *CastV = Builder.CreateBitCast(V, ShuffleTy);
729737
Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
730738
replaceValue(I, *Shuf);
731739
return true;

llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll

+28-13
Original file line numberDiff line numberDiff line change
@@ -33,35 +33,50 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) {
3333
ret <4 x float> %r
3434
}
3535

36-
; TODO - length-changing shuffle
36+
; Length-changing shuffles
3737

3838
define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) {
39-
; CHECK-LABEL: @bitcast_shuf_narrow_element_subvector(
40-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42-
; CHECK-NEXT: ret <16 x i8> [[R]]
39+
; SSE-LABEL: @bitcast_shuf_narrow_element_subvector(
40+
; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41+
; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42+
; SSE-NEXT: ret <16 x i8> [[R]]
43+
;
44+
; AVX-LABEL: @bitcast_shuf_narrow_element_subvector(
45+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
46+
; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
47+
; AVX-NEXT: ret <16 x i8> [[R]]
4348
;
4449
%shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
4550
%r = bitcast <4 x i32> %shuf to <16 x i8>
4651
ret <16 x i8> %r
4752
}
4853

4954
define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) {
50-
; CHECK-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
51-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
52-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16>
53-
; CHECK-NEXT: ret <16 x i16> [[R]]
55+
; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
56+
; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
57+
; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16>
58+
; SSE-NEXT: ret <16 x i16> [[R]]
59+
;
60+
; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
61+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
62+
; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63+
; AVX-NEXT: ret <16 x i16> [[R]]
5464
;
5565
%shuf = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
5666
%r = bitcast <4 x i64> %shuf to <16 x i16>
5767
ret <16 x i16> %r
5868
}
5969

6070
define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) {
61-
; CHECK-LABEL: @bitcast_shuf_extract_subvector(
62-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
63-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
64-
; CHECK-NEXT: ret <16 x i8> [[R]]
71+
; SSE-LABEL: @bitcast_shuf_extract_subvector(
72+
; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8>
73+
; SSE-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
74+
; SSE-NEXT: ret <16 x i8> [[R]]
75+
;
76+
; AVX-LABEL: @bitcast_shuf_extract_subvector(
77+
; AVX-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
78+
; AVX-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
79+
; AVX-NEXT: ret <16 x i8> [[R]]
6580
;
6681
%shuf = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6782
%r = bitcast <4 x i32> %shuf to <16 x i8>

llvm/test/Transforms/VectorCombine/X86/shuffle.ll

+28-13
Original file line numberDiff line numberDiff line change
@@ -33,35 +33,50 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) {
3333
ret <4 x float> %r
3434
}
3535

36-
; TODO - Length-changing shuffle
36+
; Length-changing shuffles
3737

3838
define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) {
39-
; CHECK-LABEL: @bitcast_shuf_narrow_element_subvector(
40-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42-
; CHECK-NEXT: ret <16 x i8> [[R]]
39+
; SSE-LABEL: @bitcast_shuf_narrow_element_subvector(
40+
; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41+
; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42+
; SSE-NEXT: ret <16 x i8> [[R]]
43+
;
44+
; AVX-LABEL: @bitcast_shuf_narrow_element_subvector(
45+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
46+
; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
47+
; AVX-NEXT: ret <16 x i8> [[R]]
4348
;
4449
%shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
4550
%r = bitcast <4 x i32> %shuf to <16 x i8>
4651
ret <16 x i8> %r
4752
}
4853

4954
define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) {
50-
; CHECK-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
51-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
52-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16>
53-
; CHECK-NEXT: ret <16 x i16> [[R]]
55+
; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
56+
; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
57+
; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16>
58+
; SSE-NEXT: ret <16 x i16> [[R]]
59+
;
60+
; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors(
61+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
62+
; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
63+
; AVX-NEXT: ret <16 x i16> [[R]]
5464
;
5565
%shuf = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
5666
%r = bitcast <4 x i64> %shuf to <16 x i16>
5767
ret <16 x i16> %r
5868
}
5969

6070
define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) {
61-
; CHECK-LABEL: @bitcast_shuf_extract_subvector(
62-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
63-
; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
64-
; CHECK-NEXT: ret <16 x i8> [[R]]
71+
; SSE-LABEL: @bitcast_shuf_extract_subvector(
72+
; SSE-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8>
73+
; SSE-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
74+
; SSE-NEXT: ret <16 x i8> [[R]]
75+
;
76+
; AVX-LABEL: @bitcast_shuf_extract_subvector(
77+
; AVX-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[V:%.*]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
78+
; AVX-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
79+
; AVX-NEXT: ret <16 x i8> [[R]]
6580
;
6681
%shuf = shufflevector <8 x i32> %v, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6782
%r = bitcast <4 x i32> %shuf to <16 x i8>

0 commit comments

Comments
 (0)