Skip to content

Commit bb91b43

Browse files
authored
[X86] Handle repeated blend mask in combineConcatVectorOps (llvm#82155)
llvm@1d27669e8ad07f8f2 add support for fold 512-bit concat(blendi(x,y,c0),blendi(z,w,c1)) to AVX512BW mask select. But when the type of subvector is v16i16, we need to generate repeated mask to make the result correct. The subnode looks like t87: v16i16 = X86ISD::BLENDI t132, t58, TargetConstant:i8<-86>.
1 parent 2646dcc commit bb91b43

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55226,6 +55226,11 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5522655226
if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) {
5522755227
uint64_t Mask0 = Ops[0].getConstantOperandVal(2);
5522855228
uint64_t Mask1 = Ops[1].getConstantOperandVal(2);
55229+
// MVT::v16i16 has repeated blend mask.
55230+
if (Op0.getSimpleValueType() == MVT::v16i16) {
55231+
Mask0 = (Mask0 << 8) | Mask0;
55232+
Mask1 = (Mask1 << 8) | Mask1;
55233+
}
5522955234
uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0;
5523055235
MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements());
5523155236
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,7 +2080,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
20802080
; AVX512BW-NEXT: vpermq {{.*#+}} zmm6 = zmm3[0,2,0,2,4,6,4,6]
20812081
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15]
20822082
; AVX512BW-NEXT: vpshufb %zmm7, %zmm6, %zmm6
2083-
; AVX512BW-NEXT: movl $8913032, %ecx # imm = 0x880088
2083+
; AVX512BW-NEXT: movl $-2004318072, %ecx # imm = 0x88888888
20842084
; AVX512BW-NEXT: kmovd %ecx, %k1
20852085
; AVX512BW-NEXT: vmovdqu16 %zmm6, %zmm4 {%k1}
20862086
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -2091,7 +2091,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
20912091
; AVX512BW-NEXT: vpermq {{.*#+}} zmm9 = zmm1[0,2,0,2,4,6,4,6]
20922092
; AVX512BW-NEXT: vpmovsxdq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm10
20932093
; AVX512BW-NEXT: vpshufb %zmm10, %zmm9, %zmm9
2094-
; AVX512BW-NEXT: movl $2228258, %ecx # imm = 0x220022
2094+
; AVX512BW-NEXT: movl $572662306, %ecx # imm = 0x22222222
20952095
; AVX512BW-NEXT: kmovd %ecx, %k2
20962096
; AVX512BW-NEXT: vmovdqu16 %zmm9, %zmm6 {%k2}
20972097
; AVX512BW-NEXT: movw $-21846, %cx # imm = 0xAAAA
@@ -2136,7 +2136,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
21362136
; AVX512BW-FCP-NEXT: vpermq {{.*#+}} zmm8 = zmm2[0,2,0,2,4,6,4,6]
21372137
; AVX512BW-FCP-NEXT: vpmovsxwd {{.*#+}} zmm9 = [0,2048,0,2305,0,2562,0,2819,0,3076,0,3333,0,3590,0,3847]
21382138
; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm8, %zmm8
2139-
; AVX512BW-FCP-NEXT: movl $8913032, %ecx # imm = 0x880088
2139+
; AVX512BW-FCP-NEXT: movl $-2004318072, %ecx # imm = 0x88888888
21402140
; AVX512BW-FCP-NEXT: kmovd %ecx, %k1
21412141
; AVX512BW-FCP-NEXT: vmovdqu16 %zmm3, %zmm8 {%k1}
21422142
; AVX512BW-FCP-NEXT: vpermt2q %zmm4, %zmm6, %zmm1
@@ -2146,7 +2146,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
21462146
; AVX512BW-FCP-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6]
21472147
; AVX512BW-FCP-NEXT: vpmovsxwq {{.*#+}} zmm10 = [2048,2305,2562,2819,3076,3333,3590,3847]
21482148
; AVX512BW-FCP-NEXT: vpshufb %zmm10, %zmm6, %zmm6
2149-
; AVX512BW-FCP-NEXT: movl $2228258, %ecx # imm = 0x220022
2149+
; AVX512BW-FCP-NEXT: movl $572662306, %ecx # imm = 0x22222222
21502150
; AVX512BW-FCP-NEXT: kmovd %ecx, %k2
21512151
; AVX512BW-FCP-NEXT: vmovdqu16 %zmm1, %zmm6 {%k2}
21522152
; AVX512BW-FCP-NEXT: movw $-21846, %cx # imm = 0xAAAA
@@ -2193,7 +2193,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
21932193
; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm6 = zmm3[0,2,0,2,4,6,4,6]
21942194
; AVX512DQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm7 = [u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,2,10,u,u,u,u,u,u,3,11,u,u,u,u,u,u,4,12,u,u,u,u,u,u,5,13,u,u,u,u,u,u,6,14,u,u,u,u,u,u,7,15]
21952195
; AVX512DQ-BW-NEXT: vpshufb %zmm7, %zmm6, %zmm6
2196-
; AVX512DQ-BW-NEXT: movl $8913032, %ecx # imm = 0x880088
2196+
; AVX512DQ-BW-NEXT: movl $-2004318072, %ecx # imm = 0x88888888
21972197
; AVX512DQ-BW-NEXT: kmovd %ecx, %k1
21982198
; AVX512DQ-BW-NEXT: vmovdqu16 %zmm6, %zmm4 {%k1}
21992199
; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -2204,7 +2204,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
22042204
; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm9 = zmm1[0,2,0,2,4,6,4,6]
22052205
; AVX512DQ-BW-NEXT: vpmovsxdq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm10
22062206
; AVX512DQ-BW-NEXT: vpshufb %zmm10, %zmm9, %zmm9
2207-
; AVX512DQ-BW-NEXT: movl $2228258, %ecx # imm = 0x220022
2207+
; AVX512DQ-BW-NEXT: movl $572662306, %ecx # imm = 0x22222222
22082208
; AVX512DQ-BW-NEXT: kmovd %ecx, %k2
22092209
; AVX512DQ-BW-NEXT: vmovdqu16 %zmm9, %zmm6 {%k2}
22102210
; AVX512DQ-BW-NEXT: movw $-21846, %cx # imm = 0xAAAA
@@ -2249,7 +2249,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
22492249
; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} zmm8 = zmm2[0,2,0,2,4,6,4,6]
22502250
; AVX512DQ-BW-FCP-NEXT: vpmovsxwd {{.*#+}} zmm9 = [0,2048,0,2305,0,2562,0,2819,0,3076,0,3333,0,3590,0,3847]
22512251
; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm8, %zmm8
2252-
; AVX512DQ-BW-FCP-NEXT: movl $8913032, %ecx # imm = 0x880088
2252+
; AVX512DQ-BW-FCP-NEXT: movl $-2004318072, %ecx # imm = 0x88888888
22532253
; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1
22542254
; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm3, %zmm8 {%k1}
22552255
; AVX512DQ-BW-FCP-NEXT: vpermt2q %zmm4, %zmm6, %zmm1
@@ -2259,7 +2259,7 @@ define void @store_i8_stride8_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
22592259
; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} zmm6 = zmm0[0,2,0,2,4,6,4,6]
22602260
; AVX512DQ-BW-FCP-NEXT: vpmovsxwq {{.*#+}} zmm10 = [2048,2305,2562,2819,3076,3333,3590,3847]
22612261
; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm10, %zmm6, %zmm6
2262-
; AVX512DQ-BW-FCP-NEXT: movl $2228258, %ecx # imm = 0x220022
2262+
; AVX512DQ-BW-FCP-NEXT: movl $572662306, %ecx # imm = 0x22222222
22632263
; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k2
22642264
; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm1, %zmm6 {%k2}
22652265
; AVX512DQ-BW-FCP-NEXT: movw $-21846, %cx # imm = 0xAAAA

0 commit comments

Comments
 (0)