@@ -369,14 +369,12 @@ define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
369
369
; AVX512BW: # %bb.0:
370
370
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
371
371
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
372
+ ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
372
373
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
373
374
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
374
- ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
375
- ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
376
- ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
377
- ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
378
- ; AVX512BW-NEXT: vpandq %zmm1, %zmm2, %zmm1
379
- ; AVX512BW-NEXT: vpaddb %zmm3, %zmm1, %zmm1
375
+ ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm3 {%k1} {z}
376
+ ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
377
+ ; AVX512BW-NEXT: vpaddb %zmm1, %zmm3, %zmm1
380
378
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
381
379
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
382
380
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
@@ -455,14 +453,12 @@ define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
455
453
; AVX512BW: # %bb.0:
456
454
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
457
455
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
456
+ ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
458
457
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
459
458
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
460
- ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
461
- ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
462
- ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
463
- ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
464
- ; AVX512BW-NEXT: vpandq %zmm1, %zmm2, %zmm1
465
- ; AVX512BW-NEXT: vpaddb %zmm3, %zmm1, %zmm1
459
+ ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm3 {%k1} {z}
460
+ ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
461
+ ; AVX512BW-NEXT: vpaddb %zmm1, %zmm3, %zmm1
466
462
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
467
463
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
468
464
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
@@ -561,14 +557,12 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
561
557
; AVX512BW: # %bb.0:
562
558
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
563
559
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
560
+ ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
564
561
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
565
562
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
566
- ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
567
- ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
568
- ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
569
- ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
570
- ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
571
- ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
563
+ ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
564
+ ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
565
+ ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
572
566
; AVX512BW-NEXT: retq
573
567
;
574
568
; AVX512DQ-LABEL: testv64i8:
@@ -651,14 +645,12 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
651
645
; AVX512BW: # %bb.0:
652
646
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
653
647
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
648
+ ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
654
649
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
655
650
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
656
- ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
657
- ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
658
- ; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
659
- ; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
660
- ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
661
- ; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
651
+ ; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
652
+ ; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
653
+ ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
662
654
; AVX512BW-NEXT: retq
663
655
;
664
656
; AVX512DQ-LABEL: testv64i8u:
0 commit comments