@@ -554,28 +554,14 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
554
554
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555
555
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
556
556
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
557
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
558
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
557
+ ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
559
558
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
560
559
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
561
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
562
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
563
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
564
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
565
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
566
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
567
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
568
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
569
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
560
+ ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
561
+ ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
570
562
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
571
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
572
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
573
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
574
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
575
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
576
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
577
563
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
578
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0 , vcc
564
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3 , vcc
579
565
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
580
566
; GFX8-NEXT: s_setpc_b64 s[30:31]
581
567
;
@@ -669,26 +655,9 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
669
655
; GFX8-LABEL: v_maximum_v2f16__nnan:
670
656
; GFX8: ; %bb.0:
671
657
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
673
- ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
674
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
675
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
676
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
677
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
678
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
679
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
680
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
681
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
682
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
683
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
684
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
685
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
686
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
687
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
688
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
689
- ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
690
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
691
- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
658
+ ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
659
+ ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
660
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
692
661
; GFX8-NEXT: s_setpc_b64 s[30:31]
693
662
;
694
663
; GFX9-LABEL: v_maximum_v2f16__nnan:
@@ -754,13 +723,11 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
754
723
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755
724
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
756
725
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
757
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
758
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
726
+ ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
759
727
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
760
728
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
761
729
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
762
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
763
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
730
+ ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
764
731
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
765
732
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
766
733
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -857,14 +824,9 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
857
824
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
858
825
; GFX8: ; %bb.0:
859
826
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
861
- ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
862
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
863
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
864
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
865
- ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
866
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
867
- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
827
+ ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
828
+ ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
829
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
868
830
; GFX8-NEXT: s_setpc_b64 s[30:31]
869
831
;
870
832
; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
@@ -938,31 +900,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
938
900
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
939
901
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
940
902
; GFX8-NEXT: v_mov_b32_e32 v0, s6
941
- ; GFX8-NEXT: v_mov_b32_e32 v1, s7
942
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s7, v0
943
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
944
- ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
903
+ ; GFX8-NEXT: v_max_f16_e32 v1, s7, v0
904
+ ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
945
905
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
946
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
947
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 64
948
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
949
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 64
950
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
951
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
906
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
952
907
; GFX8-NEXT: v_mov_b32_e32 v1, s5
953
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
954
- ; GFX8-NEXT: v_mov_b32_e32 v2, s4
955
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s4, v1
956
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
908
+ ; GFX8-NEXT: v_max_f16_e32 v3, s4, v1
957
909
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
958
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
959
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 64
960
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
961
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 64
962
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
963
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
964
910
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
965
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1 , vcc
911
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3 , vcc
966
912
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
967
913
; GFX8-NEXT: ;;#ASMSTART
968
914
; GFX8-NEXT: ; use v0
0 commit comments