Skip to content

Commit 42ddb55

Browse files
authored
[X86] combineINSERT_SUBVECTOR - peek through bitcasts to find a concatenation of subvector shuffles (#131331)
Extend existing concat(shuffle(),shuffle(),..) shuffle combining to handle mixed bitcasts such as concat(shuffle(),bitcast(shuffle()),...)
1 parent eeb2733 commit 42ddb55

8 files changed

+7566
-8112
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58818,7 +58818,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5881858818

5881958819
// Attempt to recursively combine to a shuffle.
5882058820
if (all_of(SubVectorOps, [](SDValue SubOp) {
58821-
return isTargetShuffle(SubOp.getOpcode());
58821+
return isTargetShuffle(peekThroughBitcasts(SubOp).getOpcode());
5882258822
})) {
5882358823
SDValue Op(N, 0);
5882458824
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll

Lines changed: 1630 additions & 1686 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll

Lines changed: 995 additions & 1053 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll

Lines changed: 592 additions & 630 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll

Lines changed: 1364 additions & 1464 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Lines changed: 2750 additions & 2770 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll

Lines changed: 232 additions & 504 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,7 @@ define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_
168168
; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm2
169169
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
170170
; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0
171-
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
172-
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
171+
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm2[2,3,0,1]
173172
; AVX512F-NEXT: retq
174173
;
175174
; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
@@ -185,8 +184,7 @@ define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_
185184
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
186185
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
187186
; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
188-
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
189-
; AVX512DQ-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
187+
; AVX512DQ-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm2[2,3,0,1]
190188
; AVX512DQ-NEXT: retq
191189
;
192190
; AVX512VBMI-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:

0 commit comments

Comments
 (0)