@@ -618,16 +618,16 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp
618
618
define amdgpu_kernel void @test_isinf_pattern_f16 (ptr addrspace (1 ) nocapture %out , half %x ) #0 {
619
619
; SI-LABEL: test_isinf_pattern_f16:
620
620
; SI: ; %bb.0:
621
- ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
622
- ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
623
- ; SI-NEXT: s_mov_b32 s7, 0xf000
624
- ; SI-NEXT: s_mov_b32 s6, -1
625
- ; SI-NEXT: s_mov_b32 s1, 0x7f800000
621
+ ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
622
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
623
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
624
+ ; SI-NEXT: s_mov_b32 s2, -1
626
625
; SI-NEXT: s_waitcnt lgkmcnt(0)
627
- ; SI-NEXT: v_cvt_f32_f16_e64 v0, |s0|
628
- ; SI-NEXT: v_cmp_eq_f32_e32 vcc, s1, v0
629
- ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
630
- ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
626
+ ; SI-NEXT: s_and_b32 s4, s4, 0x7fff
627
+ ; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00
628
+ ; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
629
+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
630
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
631
631
; SI-NEXT: s_endpgm
632
632
;
633
633
; VI-LABEL: test_isinf_pattern_f16:
@@ -667,16 +667,19 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
667
667
define amdgpu_kernel void @test_isfinite_pattern_0_f16 (ptr addrspace (1 ) nocapture %out , half %x ) #0 {
668
668
; SI-LABEL: test_isfinite_pattern_0_f16:
669
669
; SI: ; %bb.0:
670
- ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
671
- ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
672
- ; SI-NEXT: s_mov_b32 s7, 0xf000
673
- ; SI-NEXT: s_mov_b32 s6, -1
674
- ; SI-NEXT: s_movk_i32 s1, 0x1f8
670
+ ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
671
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
672
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
673
+ ; SI-NEXT: s_mov_b32 s2, -1
675
674
; SI-NEXT: s_waitcnt lgkmcnt(0)
676
- ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
677
- ; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1
678
- ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
679
- ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
675
+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, s4
676
+ ; SI-NEXT: s_and_b32 s4, s4, 0x7fff
677
+ ; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
678
+ ; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00
679
+ ; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
680
+ ; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
681
+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
682
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
680
683
; SI-NEXT: s_endpgm
681
684
;
682
685
; VI-LABEL: test_isfinite_pattern_0_f16:
@@ -718,16 +721,19 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
718
721
define amdgpu_kernel void @test_isfinite_pattern_4_f16 (ptr addrspace (1 ) nocapture %out , half %x ) #0 {
719
722
; SI-LABEL: test_isfinite_pattern_4_f16:
720
723
; SI: ; %bb.0:
721
- ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
722
- ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
723
- ; SI-NEXT: s_mov_b32 s7, 0xf000
724
- ; SI-NEXT: s_mov_b32 s6, -1
725
- ; SI-NEXT: s_movk_i32 s1, 0x1f8
724
+ ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
725
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
726
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
727
+ ; SI-NEXT: s_mov_b32 s2, -1
726
728
; SI-NEXT: s_waitcnt lgkmcnt(0)
727
- ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
728
- ; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1
729
- ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
730
- ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
729
+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, s4
730
+ ; SI-NEXT: s_and_b32 s4, s4, 0x7fff
731
+ ; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
732
+ ; SI-NEXT: s_cmpk_lt_i32 s4, 0x7c00
733
+ ; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
734
+ ; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
735
+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
736
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
731
737
; SI-NEXT: s_endpgm
732
738
;
733
739
; VI-LABEL: test_isfinite_pattern_4_f16:
0 commit comments