Skip to content

Commit c0873fa

Browse files
committed
[RISCV] Add trunc-sat-clip tests for i32->i8, i64->i8, and i64->i16. NFC
These can be implemented with multiple vnclips.
1 parent 8a8cd8a commit c0873fa

File tree

2 files changed

+727
-0
lines changed

2 files changed

+727
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,3 +410,377 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
410410
store <4 x i32> %4, ptr %y, align 8
411411
ret void
412412
}
413+
414+
define void @trunc_sat_i8i32_maxmin(ptr %x, ptr %y) {
415+
; CHECK-LABEL: trunc_sat_i8i32_maxmin:
416+
; CHECK: # %bb.0:
417+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
418+
; CHECK-NEXT: vle32.v v8, (a0)
419+
; CHECK-NEXT: li a0, -128
420+
; CHECK-NEXT: vmax.vx v8, v8, a0
421+
; CHECK-NEXT: li a0, 127
422+
; CHECK-NEXT: vmin.vx v8, v8, a0
423+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
424+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
425+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
426+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
427+
; CHECK-NEXT: vse8.v v8, (a1)
428+
; CHECK-NEXT: ret
429+
%1 = load <4 x i32>, ptr %x, align 16
430+
%2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
431+
%3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
432+
%4 = trunc <4 x i32> %3 to <4 x i8>
433+
store <4 x i8> %4, ptr %y, align 8
434+
ret void
435+
}
436+
437+
define void @trunc_sat_i8i32_minmax(ptr %x, ptr %y) {
438+
; CHECK-LABEL: trunc_sat_i8i32_minmax:
439+
; CHECK: # %bb.0:
440+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
441+
; CHECK-NEXT: vle32.v v8, (a0)
442+
; CHECK-NEXT: li a0, 127
443+
; CHECK-NEXT: vmin.vx v8, v8, a0
444+
; CHECK-NEXT: li a0, -128
445+
; CHECK-NEXT: vmax.vx v8, v8, a0
446+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
447+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
448+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
449+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
450+
; CHECK-NEXT: vse8.v v8, (a1)
451+
; CHECK-NEXT: ret
452+
%1 = load <4 x i32>, ptr %x, align 16
453+
%2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
454+
%3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
455+
%4 = trunc <4 x i32> %3 to <4 x i8>
456+
store <4 x i8> %4, ptr %y, align 8
457+
ret void
458+
}
459+
460+
define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
461+
; CHECK-LABEL: trunc_sat_u8u32_min:
462+
; CHECK: # %bb.0:
463+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
464+
; CHECK-NEXT: vle32.v v8, (a0)
465+
; CHECK-NEXT: li a0, 255
466+
; CHECK-NEXT: vminu.vx v8, v8, a0
467+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
468+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
469+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
470+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
471+
; CHECK-NEXT: vse8.v v8, (a1)
472+
; CHECK-NEXT: ret
473+
%1 = load <4 x i32>, ptr %x, align 16
474+
%2 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
475+
%3 = trunc <4 x i32> %2 to <4 x i8>
476+
store <4 x i8> %3, ptr %y, align 8
477+
ret void
478+
}
479+
480+
; FIXME: This can be a signed vmax followed by vnclipu.
481+
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
482+
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
483+
; CHECK: # %bb.0:
484+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
485+
; CHECK-NEXT: vle32.v v8, (a0)
486+
; CHECK-NEXT: vmax.vx v8, v8, zero
487+
; CHECK-NEXT: li a0, 255
488+
; CHECK-NEXT: vmin.vx v8, v8, a0
489+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
490+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
491+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
492+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
493+
; CHECK-NEXT: vse8.v v8, (a1)
494+
; CHECK-NEXT: ret
495+
%1 = load <4 x i32>, ptr %x, align 16
496+
%2 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> zeroinitializer)
497+
%3 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %2, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
498+
%4 = trunc <4 x i32> %3 to <4 x i8>
499+
store <4 x i8> %4, ptr %y, align 8
500+
ret void
501+
}
502+
503+
; FIXME: This can be a signed vmax followed by vnclipu.
504+
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
505+
; CHECK-LABEL: trunc_sat_u8u32_minmax:
506+
; CHECK: # %bb.0:
507+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
508+
; CHECK-NEXT: vle32.v v8, (a0)
509+
; CHECK-NEXT: li a0, 255
510+
; CHECK-NEXT: vmin.vx v8, v8, a0
511+
; CHECK-NEXT: vmax.vx v8, v8, zero
512+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
513+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
514+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
515+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
516+
; CHECK-NEXT: vse8.v v8, (a1)
517+
; CHECK-NEXT: ret
518+
%1 = load <4 x i32>, ptr %x, align 16
519+
%2 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
520+
%3 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %2, <4 x i32> zeroinitializer)
521+
%4 = trunc <4 x i32> %3 to <4 x i8>
522+
store <4 x i8> %4, ptr %y, align 8
523+
ret void
524+
}
525+
526+
define void @trunc_sat_i8i64_maxmin(ptr %x, ptr %y) {
527+
; CHECK-LABEL: trunc_sat_i8i64_maxmin:
528+
; CHECK: # %bb.0:
529+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
530+
; CHECK-NEXT: vle64.v v8, (a0)
531+
; CHECK-NEXT: li a0, -128
532+
; CHECK-NEXT: vmax.vx v8, v8, a0
533+
; CHECK-NEXT: li a0, 127
534+
; CHECK-NEXT: vmin.vx v8, v8, a0
535+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
536+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
537+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
538+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
539+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
540+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
541+
; CHECK-NEXT: vse8.v v8, (a1)
542+
; CHECK-NEXT: ret
543+
%1 = load <4 x i64>, ptr %x, align 16
544+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
545+
%3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
546+
%4 = trunc <4 x i64> %3 to <4 x i8>
547+
store <4 x i8> %4, ptr %y, align 8
548+
ret void
549+
}
550+
551+
define void @trunc_sat_i8i64_minmax(ptr %x, ptr %y) {
552+
; CHECK-LABEL: trunc_sat_i8i64_minmax:
553+
; CHECK: # %bb.0:
554+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
555+
; CHECK-NEXT: vle64.v v8, (a0)
556+
; CHECK-NEXT: li a0, 127
557+
; CHECK-NEXT: vmin.vx v8, v8, a0
558+
; CHECK-NEXT: li a0, -128
559+
; CHECK-NEXT: vmax.vx v8, v8, a0
560+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
561+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
562+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
563+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
564+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
565+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
566+
; CHECK-NEXT: vse8.v v8, (a1)
567+
; CHECK-NEXT: ret
568+
%1 = load <4 x i64>, ptr %x, align 16
569+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 127, i64 127, i64 127, i64 127>)
570+
%3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128>)
571+
%4 = trunc <4 x i64> %3 to <4 x i8>
572+
store <4 x i8> %4, ptr %y, align 8
573+
ret void
574+
}
575+
576+
define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
577+
; CHECK-LABEL: trunc_sat_u8u64_min:
578+
; CHECK: # %bb.0:
579+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
580+
; CHECK-NEXT: vle64.v v8, (a0)
581+
; CHECK-NEXT: li a0, 255
582+
; CHECK-NEXT: vminu.vx v8, v8, a0
583+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
584+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
585+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
586+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
587+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
588+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
589+
; CHECK-NEXT: vse8.v v8, (a1)
590+
; CHECK-NEXT: ret
591+
%1 = load <4 x i64>, ptr %x, align 16
592+
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
593+
%3 = trunc <4 x i64> %2 to <4 x i8>
594+
store <4 x i8> %3, ptr %y, align 8
595+
ret void
596+
}
597+
598+
; FIXME: This can be a signed vmax followed by vnclipu.
599+
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
600+
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
601+
; CHECK: # %bb.0:
602+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
603+
; CHECK-NEXT: vle64.v v8, (a0)
604+
; CHECK-NEXT: vmax.vx v8, v8, zero
605+
; CHECK-NEXT: li a0, 255
606+
; CHECK-NEXT: vmin.vx v8, v8, a0
607+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
608+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
609+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
610+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
611+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
612+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
613+
; CHECK-NEXT: vse8.v v8, (a1)
614+
; CHECK-NEXT: ret
615+
%1 = load <4 x i64>, ptr %x, align 16
616+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> zeroinitializer)
617+
%3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
618+
%4 = trunc <4 x i64> %3 to <4 x i8>
619+
store <4 x i8> %4, ptr %y, align 8
620+
ret void
621+
}
622+
623+
; FIXME: This can be a signed vmax followed by vnclipu.
624+
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
625+
; CHECK-LABEL: trunc_sat_u8u64_minmax:
626+
; CHECK: # %bb.0:
627+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
628+
; CHECK-NEXT: vle64.v v8, (a0)
629+
; CHECK-NEXT: li a0, 255
630+
; CHECK-NEXT: vmin.vx v8, v8, a0
631+
; CHECK-NEXT: vmax.vx v8, v8, zero
632+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
633+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
634+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
635+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
636+
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
637+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
638+
; CHECK-NEXT: vse8.v v8, (a1)
639+
; CHECK-NEXT: ret
640+
%1 = load <4 x i64>, ptr %x, align 16
641+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 255, i64 255, i64 255, i64 255>)
642+
%3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> zeroinitializer)
643+
%4 = trunc <4 x i64> %3 to <4 x i8>
644+
store <4 x i8> %4, ptr %y, align 8
645+
ret void
646+
}
647+
648+
define void @trunc_sat_i16i64_maxmin(ptr %x, ptr %y) {
649+
; CHECK-LABEL: trunc_sat_i16i64_maxmin:
650+
; CHECK: # %bb.0:
651+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
652+
; CHECK-NEXT: vle64.v v8, (a0)
653+
; CHECK-NEXT: lui a0, 1048568
654+
; CHECK-NEXT: vmax.vx v8, v8, a0
655+
; CHECK-NEXT: lui a0, 8
656+
; CHECK-NEXT: addiw a0, a0, -1
657+
; CHECK-NEXT: vmin.vx v8, v8, a0
658+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
659+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
660+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
661+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
662+
; CHECK-NEXT: vse16.v v8, (a1)
663+
; CHECK-NEXT: ret
664+
%1 = load <4 x i64>, ptr %x, align 32
665+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
666+
%3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
667+
%4 = trunc <4 x i64> %3 to <4 x i16>
668+
store <4 x i16> %4, ptr %y, align 16
669+
ret void
670+
}
671+
672+
define void @trunc_sat_i16i64_minmax(ptr %x, ptr %y) {
673+
; CHECK-LABEL: trunc_sat_i16i64_minmax:
674+
; CHECK: # %bb.0:
675+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
676+
; CHECK-NEXT: vle64.v v8, (a0)
677+
; CHECK-NEXT: lui a0, 8
678+
; CHECK-NEXT: addiw a0, a0, -1
679+
; CHECK-NEXT: vmin.vx v8, v8, a0
680+
; CHECK-NEXT: lui a0, 1048568
681+
; CHECK-NEXT: vmax.vx v8, v8, a0
682+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
683+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
684+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
685+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
686+
; CHECK-NEXT: vse16.v v8, (a1)
687+
; CHECK-NEXT: ret
688+
%1 = load <4 x i64>, ptr %x, align 32
689+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
690+
%3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>)
691+
%4 = trunc <4 x i64> %3 to <4 x i16>
692+
store <4 x i16> %4, ptr %y, align 16
693+
ret void
694+
}
695+
696+
define void @trunc_sat_u16u64_notopt(ptr %x, ptr %y) {
697+
; CHECK-LABEL: trunc_sat_u16u64_notopt:
698+
; CHECK: # %bb.0:
699+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
700+
; CHECK-NEXT: vle64.v v8, (a0)
701+
; CHECK-NEXT: lui a0, 8
702+
; CHECK-NEXT: addiw a0, a0, -1
703+
; CHECK-NEXT: vminu.vx v8, v8, a0
704+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
705+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
706+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
707+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
708+
; CHECK-NEXT: vse16.v v8, (a1)
709+
; CHECK-NEXT: ret
710+
%1 = load <4 x i64>, ptr %x, align 32
711+
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767>)
712+
%3 = trunc <4 x i64> %2 to <4 x i16>
713+
store <4 x i16> %3, ptr %y, align 16
714+
ret void
715+
}
716+
717+
define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
718+
; CHECK-LABEL: trunc_sat_u16u64_min:
719+
; CHECK: # %bb.0:
720+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
721+
; CHECK-NEXT: vle64.v v8, (a0)
722+
; CHECK-NEXT: lui a0, 16
723+
; CHECK-NEXT: addiw a0, a0, -1
724+
; CHECK-NEXT: vminu.vx v8, v8, a0
725+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
726+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
727+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
728+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
729+
; CHECK-NEXT: vse16.v v8, (a1)
730+
; CHECK-NEXT: ret
731+
%1 = load <4 x i64>, ptr %x, align 32
732+
%2 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
733+
%3 = trunc <4 x i64> %2 to <4 x i16>
734+
store <4 x i16> %3, ptr %y, align 16
735+
ret void
736+
}
737+
738+
; FIXME: This can be a signed vmax followed by vnclipu.
739+
define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
740+
; CHECK-LABEL: trunc_sat_u16u64_maxmin:
741+
; CHECK: # %bb.0:
742+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
743+
; CHECK-NEXT: vle64.v v8, (a0)
744+
; CHECK-NEXT: li a0, 1
745+
; CHECK-NEXT: vmax.vx v8, v8, a0
746+
; CHECK-NEXT: lui a0, 16
747+
; CHECK-NEXT: addiw a0, a0, -1
748+
; CHECK-NEXT: vmin.vx v8, v8, a0
749+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
750+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
751+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
752+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
753+
; CHECK-NEXT: vse16.v v8, (a1)
754+
; CHECK-NEXT: ret
755+
%1 = load <4 x i64>, ptr %x, align 16
756+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %1, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
757+
%3 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %2, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
758+
%4 = trunc <4 x i64> %3 to <4 x i16>
759+
store <4 x i16> %4, ptr %y, align 8
760+
ret void
761+
}
762+
763+
; FIXME: This can be a signed vmax followed by vnclipu.
764+
define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
765+
; CHECK-LABEL: trunc_sat_u16u64_minmax:
766+
; CHECK: # %bb.0:
767+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
768+
; CHECK-NEXT: vle64.v v8, (a0)
769+
; CHECK-NEXT: lui a0, 16
770+
; CHECK-NEXT: addiw a0, a0, -1
771+
; CHECK-NEXT: vmin.vx v8, v8, a0
772+
; CHECK-NEXT: li a0, 50
773+
; CHECK-NEXT: vmax.vx v8, v8, a0
774+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
775+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
776+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
777+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
778+
; CHECK-NEXT: vse16.v v8, (a1)
779+
; CHECK-NEXT: ret
780+
%1 = load <4 x i64>, ptr %x, align 16
781+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535>)
782+
%3 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %2, <4 x i64> <i64 50, i64 50, i64 50, i64 50>)
783+
%4 = trunc <4 x i64> %3 to <4 x i16>
784+
store <4 x i16> %4, ptr %y, align 8
785+
ret void
786+
}

0 commit comments

Comments
 (0)