@@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
386
386
define <32 x i1 > @i64_mask_extract_32 (i64 %mask ) {
387
387
; X64-AVX512-LABEL: i64_mask_extract_32:
388
388
; X64-AVX512: # %bb.0:
389
- ; X64-AVX512-NEXT: movq %rdi, %rax
390
- ; X64-AVX512-NEXT: kmovd %eax, %k0
391
- ; X64-AVX512-NEXT: movzbl %ah, %ecx
392
- ; X64-AVX512-NEXT: kmovd %ecx, %k1
393
- ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
394
- ; X64-AVX512-NEXT: movl %eax, %ecx
395
- ; X64-AVX512-NEXT: shrl $24, %ecx
396
- ; X64-AVX512-NEXT: kmovd %ecx, %k1
397
- ; X64-AVX512-NEXT: shrl $16, %eax
398
- ; X64-AVX512-NEXT: movzbl %al, %eax
399
- ; X64-AVX512-NEXT: kmovd %eax, %k2
400
- ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
401
- ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
389
+ ; X64-AVX512-NEXT: kmovq %rdi, %k0
390
+ ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
391
+ ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
392
+ ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
393
+ ; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
394
+ ; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
395
+ ; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
402
396
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
403
397
; X64-AVX512-NEXT: retq
404
398
;
405
399
; X64-KNL-LABEL: i64_mask_extract_32:
406
400
; X64-KNL: # %bb.0:
407
- ; X64-KNL-NEXT: movq %rdi , %rax
408
- ; X64-KNL-NEXT: movl %eax , %ecx
401
+ ; X64-KNL-NEXT: movl %edi , %eax
402
+ ; X64-KNL-NEXT: shrl $16 , %eax
409
403
; X64-KNL-NEXT: kmovw %eax, %k0
410
- ; X64-KNL-NEXT: movzbl %ah, %edx
411
- ; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
404
+ ; X64-KNL-NEXT: movl %edi, %eax
412
405
; X64-KNL-NEXT: shrl $24, %eax
413
406
; X64-KNL-NEXT: kmovw %eax, %k1
414
- ; X64-KNL-NEXT: shrl $16, %ecx
415
- ; X64-KNL-NEXT: movzbl %cl, %eax
416
- ; X64-KNL-NEXT: kmovw %eax, %k2
417
- ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
418
- ; X64-KNL-NEXT: kmovw %edx, %k2
407
+ ; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
408
+ ; X64-KNL-NEXT: kmovw %edi, %k0
409
+ ; X64-KNL-NEXT: shrl $8, %edi
410
+ ; X64-KNL-NEXT: kmovw %edi, %k2
419
411
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
420
412
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
421
413
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
480
472
define <64 x i1 > @i64_mask_extract_64 (i64 %mask ) {
481
473
; X64-AVX512-LABEL: i64_mask_extract_64:
482
474
; X64-AVX512: # %bb.0:
483
- ; X64-AVX512-NEXT: movq %rdi, %rax
484
- ; X64-AVX512-NEXT: kmovd %eax, %k0
485
- ; X64-AVX512-NEXT: movzbl %ah, %ecx
486
- ; X64-AVX512-NEXT: kmovd %ecx, %k1
487
- ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
488
- ; X64-AVX512-NEXT: movl %eax, %ecx
489
- ; X64-AVX512-NEXT: shrl $24, %ecx
490
- ; X64-AVX512-NEXT: kmovd %ecx, %k1
491
- ; X64-AVX512-NEXT: movl %eax, %ecx
492
- ; X64-AVX512-NEXT: shrl $16, %ecx
493
- ; X64-AVX512-NEXT: movzbl %cl, %ecx
494
- ; X64-AVX512-NEXT: kmovd %ecx, %k2
495
- ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
496
- ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
497
- ; X64-AVX512-NEXT: movq %rdi, %rcx
498
- ; X64-AVX512-NEXT: shrq $32, %rcx
499
- ; X64-AVX512-NEXT: movzbl %cl, %ecx
500
- ; X64-AVX512-NEXT: kmovd %ecx, %k1
501
- ; X64-AVX512-NEXT: movq %rdi, %rcx
502
- ; X64-AVX512-NEXT: shrq $40, %rcx
503
- ; X64-AVX512-NEXT: movzbl %cl, %ecx
504
- ; X64-AVX512-NEXT: kmovd %ecx, %k2
475
+ ; X64-AVX512-NEXT: kmovq %rdi, %k0
476
+ ; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
477
+ ; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
505
478
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
506
- ; X64-AVX512-NEXT: movq %rdi, %rcx
507
- ; X64-AVX512-NEXT: shrq $56, %rcx
508
- ; X64-AVX512-NEXT: kmovd %ecx, %k2
509
- ; X64-AVX512-NEXT: shrq $48, %rax
510
- ; X64-AVX512-NEXT: movzbl %al, %eax
511
- ; X64-AVX512-NEXT: kmovd %eax, %k3
512
- ; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
479
+ ; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
480
+ ; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
481
+ ; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
513
482
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
483
+ ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
484
+ ; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
485
+ ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
486
+ ; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
487
+ ; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
488
+ ; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
514
489
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
515
490
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
516
491
; X64-AVX512-NEXT: retq
517
492
;
518
493
; X64-KNL-LABEL: i64_mask_extract_64:
519
494
; X64-KNL: # %bb.0:
520
- ; X64-KNL-NEXT: pushq %rbx
521
- ; X64-KNL-NEXT: .cfi_def_cfa_offset 16
522
- ; X64-KNL-NEXT: .cfi_offset %rbx, -16
523
- ; X64-KNL-NEXT: movq %rsi, %rcx
524
495
; X64-KNL-NEXT: movq %rdi, %rax
525
- ; X64-KNL-NEXT: movl %ecx, %edx
526
- ; X64-KNL-NEXT: movq %rsi, %rdi
527
- ; X64-KNL-NEXT: movq %rsi, %r8
528
- ; X64-KNL-NEXT: movq %rsi, %r9
529
- ; X64-KNL-NEXT: kmovw %ecx, %k0
530
- ; X64-KNL-NEXT: movzbl %ch, %ebx
531
- ; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
532
- ; X64-KNL-NEXT: shrl $24, %ecx
496
+ ; X64-KNL-NEXT: kmovw %esi, %k0
497
+ ; X64-KNL-NEXT: movl %esi, %ecx
498
+ ; X64-KNL-NEXT: shrl $8, %ecx
499
+ ; X64-KNL-NEXT: kmovw %ecx, %k1
500
+ ; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
501
+ ; X64-KNL-NEXT: movl %esi, %ecx
502
+ ; X64-KNL-NEXT: shrl $16, %ecx
533
503
; X64-KNL-NEXT: kmovw %ecx, %k1
534
- ; X64-KNL-NEXT: shrl $16 , %edx
535
- ; X64-KNL-NEXT: movzbl %dl , %ecx
504
+ ; X64-KNL-NEXT: movl %esi , %ecx
505
+ ; X64-KNL-NEXT: shrl $24 , %ecx
536
506
; X64-KNL-NEXT: kmovw %ecx, %k2
537
- ; X64-KNL-NEXT: shrq $32, %rsi
538
- ; X64-KNL-NEXT: movzbl %sil, %ecx
507
+ ; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
508
+ ; X64-KNL-NEXT: movq %rsi, %rcx
509
+ ; X64-KNL-NEXT: shrq $32, %rcx
510
+ ; X64-KNL-NEXT: kmovw %ecx, %k2
511
+ ; X64-KNL-NEXT: movq %rsi, %rcx
512
+ ; X64-KNL-NEXT: shrq $40, %rcx
513
+ ; X64-KNL-NEXT: kmovw %ecx, %k3
514
+ ; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
515
+ ; X64-KNL-NEXT: movq %rsi, %rcx
516
+ ; X64-KNL-NEXT: shrq $48, %rcx
539
517
; X64-KNL-NEXT: kmovw %ecx, %k3
540
- ; X64-KNL-NEXT: shrq $40, %rdi
541
- ; X64-KNL-NEXT: movzbl %dil, %ecx
542
- ; X64-KNL-NEXT: kmovw %ecx, %k4
543
- ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
544
- ; X64-KNL-NEXT: shrq $56, %r8
545
- ; X64-KNL-NEXT: kmovw %r8d, %k2
518
+ ; X64-KNL-NEXT: shrq $56, %rsi
519
+ ; X64-KNL-NEXT: kmovw %esi, %k4
546
520
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
547
- ; X64-KNL-NEXT: shrq $48, %r9
548
- ; X64-KNL-NEXT: movzbl %r9b, %ecx
549
- ; X64-KNL-NEXT: kmovw %ecx, %k4
550
- ; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
551
- ; X64-KNL-NEXT: kmovw %ebx, %k4
552
- ; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
553
- ; X64-KNL-NEXT: kmovw %k0, (%rax)
554
- ; X64-KNL-NEXT: kmovw %k2, 6(%rax)
555
- ; X64-KNL-NEXT: kmovw %k3, 4(%rax)
556
- ; X64-KNL-NEXT: kmovw %k1, 2(%rax)
557
- ; X64-KNL-NEXT: popq %rbx
558
- ; X64-KNL-NEXT: .cfi_def_cfa_offset 8
521
+ ; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
522
+ ; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
523
+ ; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
524
+ ; X64-KNL-NEXT: kmovw %k0, (%rdi)
559
525
; X64-KNL-NEXT: retq
560
526
%.splatinsert = insertelement <64 x i64 > poison, i64 %mask , i64 0
561
527
%.splat = shufflevector <64 x i64 > %.splatinsert , <64 x i64 > poison, <64 x i32 > zeroinitializer
0 commit comments