Skip to content

Commit 8d78ad1

Browse files
RKSimonnikic
authored andcommitted
[X86][AVX512] Only lower to VPALIGNR if we have BWI (PR48322)
1 parent dd2759e commit 8d78ad1

File tree

2 files changed

+31
-3
lines changed

2 files changed

+31
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -17270,12 +17270,14 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1727017270
return Rotate;
1727117271

1727217272
// Try to use PALIGNR.
17273-
if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
17274-
Subtarget, DAG))
17275-
return Rotate;
17273+
if (Subtarget.hasBWI())
17274+
if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
17275+
Subtarget, DAG))
17276+
return Rotate;
1727617277

1727717278
if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
1727817279
return Unpck;
17280+
1727917281
// If we have AVX512F support, we can use VEXPAND.
1728017282
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
1728117283
DAG, Subtarget))

llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll

+26
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,16 @@ define <16 x float> @shuffle_v16f32_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14
167167
ret <16 x float> %tmp2
168168
}
169169

170+
; PR48322
171+
define <16 x float> @shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x float> %a, <16 x float> %b) {
172+
; ALL-LABEL: shuffle_v16f32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
173+
; ALL: # %bb.0:
174+
; ALL-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[7],zmm1[6]
175+
; ALL-NEXT: retq
176+
%shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29>
177+
ret <16 x float> %shuffle
178+
}
179+
170180
define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) {
171181
; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
172182
; ALL: # %bb.0:
@@ -506,6 +516,22 @@ define <16 x i32> @shuffle_v16i32_16_16_02_03_20_20_06_07_24_24_10_11_28_28_uu_u
506516
ret <16 x i32> %shuffle
507517
}
508518

519+
; PR48322
520+
define <16 x i32> @shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29(<16 x i32> %a, <16 x i32> %b) {
521+
; AVX512F-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
522+
; AVX512F: # %bb.0:
523+
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,3,10,5,12,7,14]
524+
; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
525+
; AVX512F-NEXT: retq
526+
;
527+
; AVX512BW-LABEL: shuffle_v16i32_02_03_16_17_06_07_20_21_10_11_24_25_14_15_28_29:
528+
; AVX512BW: # %bb.0:
529+
; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,4,5,6,7],zmm0[24,25,26,27,28,29,30,31],zmm1[16,17,18,19,20,21,22,23],zmm0[40,41,42,43,44,45,46,47],zmm1[32,33,34,35,36,37,38,39],zmm0[56,57,58,59,60,61,62,63],zmm1[48,49,50,51,52,53,54,55]
530+
; AVX512BW-NEXT: retq
531+
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 16, i32 17, i32 6, i32 7, i32 20, i32 21, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 28, i32 29>
532+
ret <16 x i32> %shuffle
533+
}
534+
509535
define <16 x i32> @shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12(<16 x i32> %a, <16 x i32> %b) {
510536
; ALL-LABEL: shuffle_v8i32_17_16_01_00_21_20_05_04_25_24_09_08_29_28_13_12:
511537
; ALL: # %bb.0:

0 commit comments

Comments
 (0)