Skip to content

Commit 635c344

Browse files
authored
[X86] Add vector_compress patterns with a zero vector passthru. (#113970)
We can use the kz form to automatically zero the extra elements. Fixes #113263.
1 parent 1831109 commit 635c344

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10549,6 +10549,9 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
1054910549
def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, undef)),
1055010550
(!cast<Instruction>(Name#_.ZSuffix#rrkz)
1055110551
_.KRCWM:$mask, _.RC:$src)>;
10552+
def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10553+
(!cast<Instruction>(Name#_.ZSuffix#rrkz)
10554+
_.KRCWM:$mask, _.RC:$src)>;
1055210555
def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.RC:$passthru)),
1055310556
(!cast<Instruction>(Name#_.ZSuffix#rrk)
1055410557
_.RC:$passthru, _.KRCWM:$mask, _.RC:$src)>;

llvm/test/CodeGen/X86/vector-compress.ll

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,3 +1211,59 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
12111211
%out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef)
12121212
ret <3 x i3> %out
12131213
}
1214+
1215+
define <4 x i32> @test_compress_v4i32_zero_passthru(<4 x i32> %vec, <4 x i1> %mask) {
1216+
; AVX2-LABEL: test_compress_v4i32_zero_passthru:
1217+
; AVX2: # %bb.0:
1218+
; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
1219+
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
1220+
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
1221+
; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
1222+
; AVX2-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
1223+
; AVX2-NEXT: vmovd %xmm1, %eax
1224+
; AVX2-NEXT: andl $1, %eax
1225+
; AVX2-NEXT: vextractps $1, %xmm0, -24(%rsp,%rax,4)
1226+
; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
1227+
; AVX2-NEXT: andl $1, %ecx
1228+
; AVX2-NEXT: addq %rax, %rcx
1229+
; AVX2-NEXT: vextractps $2, %xmm0, -24(%rsp,%rcx,4)
1230+
; AVX2-NEXT: vpextrd $2, %xmm1, %eax
1231+
; AVX2-NEXT: andl $1, %eax
1232+
; AVX2-NEXT: addq %rcx, %rax
1233+
; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
1234+
; AVX2-NEXT: andl $1, %ecx
1235+
; AVX2-NEXT: addq %rax, %rcx
1236+
; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
1237+
; AVX2-NEXT: andl $3, %eax
1238+
; AVX2-NEXT: vextractps $3, %xmm0, -24(%rsp,%rax,4)
1239+
; AVX2-NEXT: xorl %eax, %eax
1240+
; AVX2-NEXT: cmpq $3, %rcx
1241+
; AVX2-NEXT: movl $3, %edx
1242+
; AVX2-NEXT: cmovbq %rcx, %rdx
1243+
; AVX2-NEXT: vextractps $3, %xmm0, %ecx
1244+
; AVX2-NEXT: cmovbel %eax, %ecx
1245+
; AVX2-NEXT: movl %ecx, -24(%rsp,%rdx,4)
1246+
; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1247+
; AVX2-NEXT: retq
1248+
;
1249+
; AVX512F-LABEL: test_compress_v4i32_zero_passthru:
1250+
; AVX512F: # %bb.0:
1251+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1252+
; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
1253+
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1254+
; AVX512F-NEXT: kshiftlw $12, %k0, %k0
1255+
; AVX512F-NEXT: kshiftrw $12, %k0, %k1
1256+
; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1257+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1258+
; AVX512F-NEXT: vzeroupper
1259+
; AVX512F-NEXT: retq
1260+
;
1261+
; AVX512VL-LABEL: test_compress_v4i32_zero_passthru:
1262+
; AVX512VL: # %bb.0:
1263+
; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm1
1264+
; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
1265+
; AVX512VL-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z}
1266+
; AVX512VL-NEXT: retq
1267+
%out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> zeroinitializer)
1268+
ret <4 x i32> %out
1269+
}

0 commit comments

Comments
 (0)