@@ -1211,3 +1211,59 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
1211
1211
%out = call <3 x i3 > @llvm.experimental.vector.compress (<3 x i3 > %vec , <3 x i1 > %mask , <3 x i3 > undef )
1212
1212
ret <3 x i3 > %out
1213
1213
}
1214
+
1215
+ define <4 x i32 > @test_compress_v4i32_zero_passthru (<4 x i32 > %vec , <4 x i1 > %mask ) {
1216
+ ; AVX2-LABEL: test_compress_v4i32_zero_passthru:
1217
+ ; AVX2: # %bb.0:
1218
+ ; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
1219
+ ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
1220
+ ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
1221
+ ; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp)
1222
+ ; AVX2-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
1223
+ ; AVX2-NEXT: vmovd %xmm1, %eax
1224
+ ; AVX2-NEXT: andl $1, %eax
1225
+ ; AVX2-NEXT: vextractps $1, %xmm0, -24(%rsp,%rax,4)
1226
+ ; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
1227
+ ; AVX2-NEXT: andl $1, %ecx
1228
+ ; AVX2-NEXT: addq %rax, %rcx
1229
+ ; AVX2-NEXT: vextractps $2, %xmm0, -24(%rsp,%rcx,4)
1230
+ ; AVX2-NEXT: vpextrd $2, %xmm1, %eax
1231
+ ; AVX2-NEXT: andl $1, %eax
1232
+ ; AVX2-NEXT: addq %rcx, %rax
1233
+ ; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
1234
+ ; AVX2-NEXT: andl $1, %ecx
1235
+ ; AVX2-NEXT: addq %rax, %rcx
1236
+ ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax
1237
+ ; AVX2-NEXT: andl $3, %eax
1238
+ ; AVX2-NEXT: vextractps $3, %xmm0, -24(%rsp,%rax,4)
1239
+ ; AVX2-NEXT: xorl %eax, %eax
1240
+ ; AVX2-NEXT: cmpq $3, %rcx
1241
+ ; AVX2-NEXT: movl $3, %edx
1242
+ ; AVX2-NEXT: cmovbq %rcx, %rdx
1243
+ ; AVX2-NEXT: vextractps $3, %xmm0, %ecx
1244
+ ; AVX2-NEXT: cmovbel %eax, %ecx
1245
+ ; AVX2-NEXT: movl %ecx, -24(%rsp,%rdx,4)
1246
+ ; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
1247
+ ; AVX2-NEXT: retq
1248
+ ;
1249
+ ; AVX512F-LABEL: test_compress_v4i32_zero_passthru:
1250
+ ; AVX512F: # %bb.0:
1251
+ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1252
+ ; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
1253
+ ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1254
+ ; AVX512F-NEXT: kshiftlw $12, %k0, %k0
1255
+ ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
1256
+ ; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1257
+ ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1258
+ ; AVX512F-NEXT: vzeroupper
1259
+ ; AVX512F-NEXT: retq
1260
+ ;
1261
+ ; AVX512VL-LABEL: test_compress_v4i32_zero_passthru:
1262
+ ; AVX512VL: # %bb.0:
1263
+ ; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm1
1264
+ ; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
1265
+ ; AVX512VL-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z}
1266
+ ; AVX512VL-NEXT: retq
1267
+ %out = call <4 x i32 > @llvm.experimental.vector.compress (<4 x i32 > %vec , <4 x i1 > %mask , <4 x i32 > zeroinitializer )
1268
+ ret <4 x i32 > %out
1269
+ }
0 commit comments