1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
2
+ ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
3
3
; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
4
4
5
5
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
@@ -623,10 +623,13 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
623
623
;
624
624
; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
625
625
; VI-SAFE: ; %bb.0: ; %.entry
626
- ; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
626
+ ; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
627
627
; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
628
- ; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
629
- ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
628
+ ; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
629
+ ; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
630
+ ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
631
+ ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
632
+ ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
630
633
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
631
634
; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
632
635
; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -646,12 +649,16 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
646
649
;
647
650
; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
648
651
; GFX11-SAFE: ; %bb.0: ; %.entry
649
- ; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
650
- ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
652
+ ; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
653
+ ; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
654
+ ; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
655
+ ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
656
+ ; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
657
+ ; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
658
+ ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
651
659
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
652
- ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0 , vcc_lo
660
+ ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0 , vcc_lo
653
661
; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
654
- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
655
662
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
656
663
; GFX11-SAFE-NEXT: ; return to shader part epilog
657
664
;
0 commit comments