Skip to content

Commit 9b94056

Browse files
authored
[X86] Relax VPERMV3 to VPERMV combine for more types (#97206)
This is a follow up of #96414
1 parent e19ac0d commit 9b94056

File tree

2 files changed

+27
-5
lines changed

2 files changed

+27
-5
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41334,15 +41334,13 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4133441334
return SDValue();
4133541335
}
4133641336
case X86ISD::VPERMV3: {
41337-
// VPERM[I,T]2[B,W] are 3 uops on Skylake and Icelake so we try to use
41338-
// VPERMV.
41337+
// Combine VPERMV3 to widened VPERMV if the two source operands are split
41338+
// from the same vector.
4133941339
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
4134041340
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
4134141341
MVT SVT = V1.getSimpleValueType();
41342-
MVT EVT = VT.getVectorElementType();
4134341342
MVT NVT = VT.getDoubleNumVectorElementsVT();
41344-
if ((EVT == MVT::i8 || EVT == MVT::i16) &&
41345-
(NVT.is256BitVector() ||
41343+
if ((NVT.is256BitVector() ||
4134641344
(NVT.is512BitVector() && Subtarget.hasEVEX512())) &&
4134741345
V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4134841346
V1.getConstantOperandVal(1) == 0 &&

llvm/test/CodeGen/X86/avx512vl-intrinsics.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7008,6 +7008,30 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double>
70087008
ret <4 x double> %1
70097009
}
70107010

7011+
define <8 x i32> @combine_vpermi2d_vpermps(<16 x i32> noundef %a) {
7012+
; X86-LABEL: combine_vpermi2d_vpermps:
7013+
; X86: # %bb.0:
7014+
; X86-NEXT: vmovaps {{.*#+}} ymm1 = [14,13,6,3,5,15,0,1]
7015+
; X86-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x0d,A,A,A,A]
7016+
; X86-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
7017+
; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7018+
; X86-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
7019+
; X86-NEXT: retl # encoding: [0xc3]
7020+
;
7021+
; X64-LABEL: combine_vpermi2d_vpermps:
7022+
; X64: # %bb.0:
7023+
; X64-NEXT: vmovaps {{.*#+}} ymm1 = [14,13,6,3,5,15,0,1]
7024+
; X64-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x0d,A,A,A,A]
7025+
; X64-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
7026+
; X64-NEXT: vpermps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7027+
; X64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
7028+
; X64-NEXT: retq # encoding: [0xc3]
7029+
%1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7030+
%2 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7031+
%3 = tail call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %1, <8 x i32> <i32 14, i32 13, i32 6, i32 3, i32 5, i32 15, i32 0, i32 1>, <8 x i32> %2)
7032+
ret <8 x i32> %3
7033+
}
7034+
70117035
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
70127036
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
70137037
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)

0 commit comments

Comments
 (0)