@@ -554,28 +554,14 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
554
554
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555
555
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
556
556
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
557
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
558
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
557
+ ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
559
558
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
560
559
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
561
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
562
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
563
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
564
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
565
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
566
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
567
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
568
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
569
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
560
+ ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
561
+ ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
570
562
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
571
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
572
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
573
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
574
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
575
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
576
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
577
563
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
578
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0 , vcc
564
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3 , vcc
579
565
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
580
566
; GFX8-NEXT: s_setpc_b64 s[30:31]
581
567
;
@@ -674,26 +660,9 @@ define <2 x half> @v_maximum_v2f16__nnan(<2 x half> %src0, <2 x half> %src1) {
674
660
; GFX8-LABEL: v_maximum_v2f16__nnan:
675
661
; GFX8: ; %bb.0:
676
662
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
678
- ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
679
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
680
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
681
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v3, 64
682
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
683
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v2, 64
684
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
685
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v4
686
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
687
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
688
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
689
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v0, 64
690
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
691
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, v1, 64
692
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
693
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
694
- ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
695
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
696
- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
663
+ ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
664
+ ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
665
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
697
666
; GFX8-NEXT: s_setpc_b64 s[30:31]
698
667
;
699
668
; GFX9-LABEL: v_maximum_v2f16__nnan:
@@ -759,13 +728,11 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
759
728
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760
729
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
761
730
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
762
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
763
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc
731
+ ; GFX8-NEXT: v_max_f16_e32 v4, v3, v2
764
732
; GFX8-NEXT: v_mov_b32_e32 v5, 0x7e00
765
733
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v3, v2
766
734
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc
767
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
768
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc
735
+ ; GFX8-NEXT: v_max_f16_e32 v3, v0, v1
769
736
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
770
737
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
771
738
; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
@@ -867,14 +834,9 @@ define <2 x half> @v_maximum_v2f16__nnan_nsz(<2 x half> %src0, <2 x half> %src1)
867
834
; GFX8-LABEL: v_maximum_v2f16__nnan_nsz:
868
835
; GFX8: ; %bb.0:
869
836
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
871
- ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
872
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v3, v2
873
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
874
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, v0, v1
875
- ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
876
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
877
- ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
837
+ ; GFX8-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
838
+ ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
839
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
878
840
; GFX8-NEXT: s_setpc_b64 s[30:31]
879
841
;
880
842
; GFX9-LABEL: v_maximum_v2f16__nnan_nsz:
@@ -948,31 +910,15 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
948
910
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
949
911
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
950
912
; GFX8-NEXT: v_mov_b32_e32 v0, s6
951
- ; GFX8-NEXT: v_mov_b32_e32 v1, s7
952
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s7, v0
953
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
954
- ; GFX8-NEXT: v_mov_b32_e32 v3, 0x7e00
913
+ ; GFX8-NEXT: v_max_f16_e32 v1, s7, v0
914
+ ; GFX8-NEXT: v_mov_b32_e32 v2, 0x7e00
955
915
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s7, v0
956
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
957
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s7, 64
958
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
959
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s6, 64
960
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
961
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v2
916
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
962
917
; GFX8-NEXT: v_mov_b32_e32 v1, s5
963
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
964
- ; GFX8-NEXT: v_mov_b32_e32 v2, s4
965
- ; GFX8-NEXT: v_cmp_gt_f16_e32 vcc, s4, v1
966
- ; GFX8-NEXT: v_cndmask_b32_e32 v4, v1, v2, vcc
918
+ ; GFX8-NEXT: v_max_f16_e32 v3, s4, v1
967
919
; GFX8-NEXT: v_cmp_o_f16_e32 vcc, s4, v1
968
- ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
969
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s4, 64
970
- ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
971
- ; GFX8-NEXT: v_cmp_class_f16_e64 vcc, s5, 64
972
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
973
- ; GFX8-NEXT: v_cmp_eq_f16_e32 vcc, 0, v3
974
920
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
975
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1 , vcc
921
+ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3 , vcc
976
922
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
977
923
; GFX8-NEXT: ;;#ASMSTART
978
924
; GFX8-NEXT: ; use v0
0 commit comments