@@ -410,3 +410,377 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
410
410
store <4 x i32 > %4 , ptr %y , align 8
411
411
ret void
412
412
}
413
+
414
+ define void @trunc_sat_i8i32_maxmin (ptr %x , ptr %y ) {
415
+ ; CHECK-LABEL: trunc_sat_i8i32_maxmin:
416
+ ; CHECK: # %bb.0:
417
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
418
+ ; CHECK-NEXT: vle32.v v8, (a0)
419
+ ; CHECK-NEXT: li a0, -128
420
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
421
+ ; CHECK-NEXT: li a0, 127
422
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
423
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
424
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
425
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
426
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
427
+ ; CHECK-NEXT: vse8.v v8, (a1)
428
+ ; CHECK-NEXT: ret
429
+ %1 = load <4 x i32 >, ptr %x , align 16
430
+ %2 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 -128 , i32 -128 , i32 -128 , i32 -128 >)
431
+ %3 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %2 , <4 x i32 > <i32 127 , i32 127 , i32 127 , i32 127 >)
432
+ %4 = trunc <4 x i32 > %3 to <4 x i8 >
433
+ store <4 x i8 > %4 , ptr %y , align 8
434
+ ret void
435
+ }
436
+
437
+ define void @trunc_sat_i8i32_minmax (ptr %x , ptr %y ) {
438
+ ; CHECK-LABEL: trunc_sat_i8i32_minmax:
439
+ ; CHECK: # %bb.0:
440
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
441
+ ; CHECK-NEXT: vle32.v v8, (a0)
442
+ ; CHECK-NEXT: li a0, 127
443
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
444
+ ; CHECK-NEXT: li a0, -128
445
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
446
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
447
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
448
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
449
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
450
+ ; CHECK-NEXT: vse8.v v8, (a1)
451
+ ; CHECK-NEXT: ret
452
+ %1 = load <4 x i32 >, ptr %x , align 16
453
+ %2 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 127 , i32 127 , i32 127 , i32 127 >)
454
+ %3 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %2 , <4 x i32 > <i32 -128 , i32 -128 , i32 -128 , i32 -128 >)
455
+ %4 = trunc <4 x i32 > %3 to <4 x i8 >
456
+ store <4 x i8 > %4 , ptr %y , align 8
457
+ ret void
458
+ }
459
+
460
+ define void @trunc_sat_u8u32_min (ptr %x , ptr %y ) {
461
+ ; CHECK-LABEL: trunc_sat_u8u32_min:
462
+ ; CHECK: # %bb.0:
463
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
464
+ ; CHECK-NEXT: vle32.v v8, (a0)
465
+ ; CHECK-NEXT: li a0, 255
466
+ ; CHECK-NEXT: vminu.vx v8, v8, a0
467
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
468
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
469
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
470
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
471
+ ; CHECK-NEXT: vse8.v v8, (a1)
472
+ ; CHECK-NEXT: ret
473
+ %1 = load <4 x i32 >, ptr %x , align 16
474
+ %2 = tail call <4 x i32 > @llvm.umin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 255 , i32 255 , i32 255 , i32 255 >)
475
+ %3 = trunc <4 x i32 > %2 to <4 x i8 >
476
+ store <4 x i8 > %3 , ptr %y , align 8
477
+ ret void
478
+ }
479
+
480
+ ; FIXME: This can be a signed vmax followed by vnclipu.
481
+ define void @trunc_sat_u8u32_maxmin (ptr %x , ptr %y ) {
482
+ ; CHECK-LABEL: trunc_sat_u8u32_maxmin:
483
+ ; CHECK: # %bb.0:
484
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
485
+ ; CHECK-NEXT: vle32.v v8, (a0)
486
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
487
+ ; CHECK-NEXT: li a0, 255
488
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
489
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
490
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
491
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
492
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
493
+ ; CHECK-NEXT: vse8.v v8, (a1)
494
+ ; CHECK-NEXT: ret
495
+ %1 = load <4 x i32 >, ptr %x , align 16
496
+ %2 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %1 , <4 x i32 > zeroinitializer )
497
+ %3 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %2 , <4 x i32 > <i32 255 , i32 255 , i32 255 , i32 255 >)
498
+ %4 = trunc <4 x i32 > %3 to <4 x i8 >
499
+ store <4 x i8 > %4 , ptr %y , align 8
500
+ ret void
501
+ }
502
+
503
+ ; FIXME: This can be a signed vmax followed by vnclipu.
504
+ define void @trunc_sat_u8u32_minmax (ptr %x , ptr %y ) {
505
+ ; CHECK-LABEL: trunc_sat_u8u32_minmax:
506
+ ; CHECK: # %bb.0:
507
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
508
+ ; CHECK-NEXT: vle32.v v8, (a0)
509
+ ; CHECK-NEXT: li a0, 255
510
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
511
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
512
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
513
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
514
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
515
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
516
+ ; CHECK-NEXT: vse8.v v8, (a1)
517
+ ; CHECK-NEXT: ret
518
+ %1 = load <4 x i32 >, ptr %x , align 16
519
+ %2 = tail call <4 x i32 > @llvm.smin.v4i32 (<4 x i32 > %1 , <4 x i32 > <i32 255 , i32 255 , i32 255 , i32 255 >)
520
+ %3 = tail call <4 x i32 > @llvm.smax.v4i32 (<4 x i32 > %2 , <4 x i32 > zeroinitializer )
521
+ %4 = trunc <4 x i32 > %3 to <4 x i8 >
522
+ store <4 x i8 > %4 , ptr %y , align 8
523
+ ret void
524
+ }
525
+
526
+ define void @trunc_sat_i8i64_maxmin (ptr %x , ptr %y ) {
527
+ ; CHECK-LABEL: trunc_sat_i8i64_maxmin:
528
+ ; CHECK: # %bb.0:
529
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
530
+ ; CHECK-NEXT: vle64.v v8, (a0)
531
+ ; CHECK-NEXT: li a0, -128
532
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
533
+ ; CHECK-NEXT: li a0, 127
534
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
535
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
536
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
537
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
538
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
539
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
540
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
541
+ ; CHECK-NEXT: vse8.v v8, (a1)
542
+ ; CHECK-NEXT: ret
543
+ %1 = load <4 x i64 >, ptr %x , align 16
544
+ %2 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 -128 , i64 -128 , i64 -128 , i64 -128 >)
545
+ %3 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 127 , i64 127 , i64 127 , i64 127 >)
546
+ %4 = trunc <4 x i64 > %3 to <4 x i8 >
547
+ store <4 x i8 > %4 , ptr %y , align 8
548
+ ret void
549
+ }
550
+
551
+ define void @trunc_sat_i8i64_minmax (ptr %x , ptr %y ) {
552
+ ; CHECK-LABEL: trunc_sat_i8i64_minmax:
553
+ ; CHECK: # %bb.0:
554
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
555
+ ; CHECK-NEXT: vle64.v v8, (a0)
556
+ ; CHECK-NEXT: li a0, 127
557
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
558
+ ; CHECK-NEXT: li a0, -128
559
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
560
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
561
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
562
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
563
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
564
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
565
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
566
+ ; CHECK-NEXT: vse8.v v8, (a1)
567
+ ; CHECK-NEXT: ret
568
+ %1 = load <4 x i64 >, ptr %x , align 16
569
+ %2 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 127 , i64 127 , i64 127 , i64 127 >)
570
+ %3 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 -128 , i64 -128 , i64 -128 , i64 -128 >)
571
+ %4 = trunc <4 x i64 > %3 to <4 x i8 >
572
+ store <4 x i8 > %4 , ptr %y , align 8
573
+ ret void
574
+ }
575
+
576
+ define void @trunc_sat_u8u64_min (ptr %x , ptr %y ) {
577
+ ; CHECK-LABEL: trunc_sat_u8u64_min:
578
+ ; CHECK: # %bb.0:
579
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
580
+ ; CHECK-NEXT: vle64.v v8, (a0)
581
+ ; CHECK-NEXT: li a0, 255
582
+ ; CHECK-NEXT: vminu.vx v8, v8, a0
583
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
584
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
585
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
586
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
587
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
588
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
589
+ ; CHECK-NEXT: vse8.v v8, (a1)
590
+ ; CHECK-NEXT: ret
591
+ %1 = load <4 x i64 >, ptr %x , align 16
592
+ %2 = tail call <4 x i64 > @llvm.umin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 255 , i64 255 , i64 255 , i64 255 >)
593
+ %3 = trunc <4 x i64 > %2 to <4 x i8 >
594
+ store <4 x i8 > %3 , ptr %y , align 8
595
+ ret void
596
+ }
597
+
598
+ ; FIXME: This can be a signed vmax followed by vnclipu.
599
+ define void @trunc_sat_u8u64_maxmin (ptr %x , ptr %y ) {
600
+ ; CHECK-LABEL: trunc_sat_u8u64_maxmin:
601
+ ; CHECK: # %bb.0:
602
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
603
+ ; CHECK-NEXT: vle64.v v8, (a0)
604
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
605
+ ; CHECK-NEXT: li a0, 255
606
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
607
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
608
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
609
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
610
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
611
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
612
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
613
+ ; CHECK-NEXT: vse8.v v8, (a1)
614
+ ; CHECK-NEXT: ret
615
+ %1 = load <4 x i64 >, ptr %x , align 16
616
+ %2 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %1 , <4 x i64 > zeroinitializer )
617
+ %3 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 255 , i64 255 , i64 255 , i64 255 >)
618
+ %4 = trunc <4 x i64 > %3 to <4 x i8 >
619
+ store <4 x i8 > %4 , ptr %y , align 8
620
+ ret void
621
+ }
622
+
623
+ ; FIXME: This can be a signed vmax followed by vnclipu.
624
+ define void @trunc_sat_u8u64_minmax (ptr %x , ptr %y ) {
625
+ ; CHECK-LABEL: trunc_sat_u8u64_minmax:
626
+ ; CHECK: # %bb.0:
627
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
628
+ ; CHECK-NEXT: vle64.v v8, (a0)
629
+ ; CHECK-NEXT: li a0, 255
630
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
631
+ ; CHECK-NEXT: vmax.vx v8, v8, zero
632
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
633
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
634
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
635
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
636
+ ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
637
+ ; CHECK-NEXT: vnsrl.wi v8, v8, 0
638
+ ; CHECK-NEXT: vse8.v v8, (a1)
639
+ ; CHECK-NEXT: ret
640
+ %1 = load <4 x i64 >, ptr %x , align 16
641
+ %2 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 255 , i64 255 , i64 255 , i64 255 >)
642
+ %3 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %2 , <4 x i64 > zeroinitializer )
643
+ %4 = trunc <4 x i64 > %3 to <4 x i8 >
644
+ store <4 x i8 > %4 , ptr %y , align 8
645
+ ret void
646
+ }
647
+
648
+ define void @trunc_sat_i16i64_maxmin (ptr %x , ptr %y ) {
649
+ ; CHECK-LABEL: trunc_sat_i16i64_maxmin:
650
+ ; CHECK: # %bb.0:
651
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
652
+ ; CHECK-NEXT: vle64.v v8, (a0)
653
+ ; CHECK-NEXT: lui a0, 1048568
654
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
655
+ ; CHECK-NEXT: lui a0, 8
656
+ ; CHECK-NEXT: addiw a0, a0, -1
657
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
658
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
659
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
660
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
661
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
662
+ ; CHECK-NEXT: vse16.v v8, (a1)
663
+ ; CHECK-NEXT: ret
664
+ %1 = load <4 x i64 >, ptr %x , align 32
665
+ %2 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 -32768 , i64 -32768 , i64 -32768 , i64 -32768 >)
666
+ %3 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 32767 , i64 32767 , i64 32767 , i64 32767 >)
667
+ %4 = trunc <4 x i64 > %3 to <4 x i16 >
668
+ store <4 x i16 > %4 , ptr %y , align 16
669
+ ret void
670
+ }
671
+
672
+ define void @trunc_sat_i16i64_minmax (ptr %x , ptr %y ) {
673
+ ; CHECK-LABEL: trunc_sat_i16i64_minmax:
674
+ ; CHECK: # %bb.0:
675
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
676
+ ; CHECK-NEXT: vle64.v v8, (a0)
677
+ ; CHECK-NEXT: lui a0, 8
678
+ ; CHECK-NEXT: addiw a0, a0, -1
679
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
680
+ ; CHECK-NEXT: lui a0, 1048568
681
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
682
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
683
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
684
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
685
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
686
+ ; CHECK-NEXT: vse16.v v8, (a1)
687
+ ; CHECK-NEXT: ret
688
+ %1 = load <4 x i64 >, ptr %x , align 32
689
+ %2 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 32767 , i64 32767 , i64 32767 , i64 32767 >)
690
+ %3 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 -32768 , i64 -32768 , i64 -32768 , i64 -32768 >)
691
+ %4 = trunc <4 x i64 > %3 to <4 x i16 >
692
+ store <4 x i16 > %4 , ptr %y , align 16
693
+ ret void
694
+ }
695
+
696
+ define void @trunc_sat_u16u64_notopt (ptr %x , ptr %y ) {
697
+ ; CHECK-LABEL: trunc_sat_u16u64_notopt:
698
+ ; CHECK: # %bb.0:
699
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
700
+ ; CHECK-NEXT: vle64.v v8, (a0)
701
+ ; CHECK-NEXT: lui a0, 8
702
+ ; CHECK-NEXT: addiw a0, a0, -1
703
+ ; CHECK-NEXT: vminu.vx v8, v8, a0
704
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
705
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
706
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
707
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
708
+ ; CHECK-NEXT: vse16.v v8, (a1)
709
+ ; CHECK-NEXT: ret
710
+ %1 = load <4 x i64 >, ptr %x , align 32
711
+ %2 = tail call <4 x i64 > @llvm.umin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 32767 , i64 32767 , i64 32767 , i64 32767 >)
712
+ %3 = trunc <4 x i64 > %2 to <4 x i16 >
713
+ store <4 x i16 > %3 , ptr %y , align 16
714
+ ret void
715
+ }
716
+
717
+ define void @trunc_sat_u16u64_min (ptr %x , ptr %y ) {
718
+ ; CHECK-LABEL: trunc_sat_u16u64_min:
719
+ ; CHECK: # %bb.0:
720
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
721
+ ; CHECK-NEXT: vle64.v v8, (a0)
722
+ ; CHECK-NEXT: lui a0, 16
723
+ ; CHECK-NEXT: addiw a0, a0, -1
724
+ ; CHECK-NEXT: vminu.vx v8, v8, a0
725
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
726
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
727
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
728
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
729
+ ; CHECK-NEXT: vse16.v v8, (a1)
730
+ ; CHECK-NEXT: ret
731
+ %1 = load <4 x i64 >, ptr %x , align 32
732
+ %2 = tail call <4 x i64 > @llvm.umin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 65535 , i64 65535 , i64 65535 , i64 65535 >)
733
+ %3 = trunc <4 x i64 > %2 to <4 x i16 >
734
+ store <4 x i16 > %3 , ptr %y , align 16
735
+ ret void
736
+ }
737
+
738
+ ; FIXME: This can be a signed vmax followed by vnclipu.
739
+ define void @trunc_sat_u16u64_maxmin (ptr %x , ptr %y ) {
740
+ ; CHECK-LABEL: trunc_sat_u16u64_maxmin:
741
+ ; CHECK: # %bb.0:
742
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
743
+ ; CHECK-NEXT: vle64.v v8, (a0)
744
+ ; CHECK-NEXT: li a0, 1
745
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
746
+ ; CHECK-NEXT: lui a0, 16
747
+ ; CHECK-NEXT: addiw a0, a0, -1
748
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
749
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
750
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
751
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
752
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
753
+ ; CHECK-NEXT: vse16.v v8, (a1)
754
+ ; CHECK-NEXT: ret
755
+ %1 = load <4 x i64 >, ptr %x , align 16
756
+ %2 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 1 , i64 1 , i64 1 , i64 1 >)
757
+ %3 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 65535 , i64 65535 , i64 65535 , i64 65535 >)
758
+ %4 = trunc <4 x i64 > %3 to <4 x i16 >
759
+ store <4 x i16 > %4 , ptr %y , align 8
760
+ ret void
761
+ }
762
+
763
+ ; FIXME: This can be a signed vmax followed by vnclipu.
764
+ define void @trunc_sat_u16u64_minmax (ptr %x , ptr %y ) {
765
+ ; CHECK-LABEL: trunc_sat_u16u64_minmax:
766
+ ; CHECK: # %bb.0:
767
+ ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
768
+ ; CHECK-NEXT: vle64.v v8, (a0)
769
+ ; CHECK-NEXT: lui a0, 16
770
+ ; CHECK-NEXT: addiw a0, a0, -1
771
+ ; CHECK-NEXT: vmin.vx v8, v8, a0
772
+ ; CHECK-NEXT: li a0, 50
773
+ ; CHECK-NEXT: vmax.vx v8, v8, a0
774
+ ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
775
+ ; CHECK-NEXT: vnsrl.wi v10, v8, 0
776
+ ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
777
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
778
+ ; CHECK-NEXT: vse16.v v8, (a1)
779
+ ; CHECK-NEXT: ret
780
+ %1 = load <4 x i64 >, ptr %x , align 16
781
+ %2 = tail call <4 x i64 > @llvm.smin.v4i64 (<4 x i64 > %1 , <4 x i64 > <i64 65535 , i64 65535 , i64 65535 , i64 65535 >)
782
+ %3 = tail call <4 x i64 > @llvm.smax.v4i64 (<4 x i64 > %2 , <4 x i64 > <i64 50 , i64 50 , i64 50 , i64 50 >)
783
+ %4 = trunc <4 x i64 > %3 to <4 x i16 >
784
+ store <4 x i16 > %4 , ptr %y , align 8
785
+ ret void
786
+ }
0 commit comments