Skip to content

Commit 74f69c4

Browse files
authored
[X86] SimplifyDemandedVectorEltsForTargetNode - reduce the size of VPERMV v16f32/v16i32 nodes if the upper elements are not demanded (#134890)
Missed in #133923 - even without AVX512VL, we can replace VPERMV v16f32/v16i32 nodes with the AVX2 v8f32/v8i32 equivalents.
1 parent 53fa92d commit 74f69c4

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43810,7 +43810,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4381043810
case X86ISD::VPERMV: {
4381143811
SmallVector<int, 16> Mask;
4381243812
SmallVector<SDValue, 2> Ops;
43813-
if ((VT.is256BitVector() || Subtarget.hasVLX()) &&
43813+
// We can always split v16i32/v16f32 AVX512 to v8i32/v8f32 AVX2 variants.
43814+
if ((VT.is256BitVector() || Subtarget.hasVLX() || VT == MVT::v16i32 ||
43815+
VT == MVT::v16f32) &&
4381443816
getTargetShuffleMask(Op, /*AllowSentinelZero=*/false, Ops, Mask)) {
4381543817
// For lane-crossing shuffles, only split in half in case we're still
4381643818
// referencing higher elements.

llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,8 +491,8 @@ define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) {
491491
; ALL-LABEL: test_v16f32_0_1_3_6:
492492
; ALL: # %bb.0:
493493
; ALL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,1,3,6]
494-
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
495-
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
494+
; ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
495+
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
496496
; ALL-NEXT: vzeroupper
497497
; ALL-NEXT: retq
498498
%res = shufflevector <16 x float> %v, <16 x float> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 6>

0 commit comments

Comments
 (0)