Skip to content

Commit e1481f7

Browse files
committed
Check fast math related flags
1 parent db148eb commit e1481f7

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17564,7 +17564,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
1756417564
}
1756517565

1756617566
// FIXME: use fast math flags instead of Options.UnsafeFPMath
17567-
if (Options.UnsafeFPMath) {
17567+
if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
17568+
Options.NoSignedZerosFPMath) {
1756817569
if (N0CFP && N0CFP->isZero())
1756917570
return N2;
1757017571
if (N1CFP && N1CFP->isZero())

llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
33
; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
44

55
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
@@ -623,10 +623,13 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
623623
;
624624
; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
625625
; VI-SAFE: ; %bb.0: ; %.entry
626-
; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
626+
; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
627627
; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
628-
; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
629-
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
628+
; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
629+
; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
630+
; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
631+
; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
632+
; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
630633
; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
631634
; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
632635
; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -646,12 +649,16 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
646649
;
647650
; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
648651
; GFX11-SAFE: ; %bb.0: ; %.entry
649-
; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
650-
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
652+
; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
653+
; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
654+
; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
655+
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
656+
; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
657+
; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
658+
; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
651659
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
652-
; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
660+
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
653661
; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
654-
; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
655662
; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
656663
; GFX11-SAFE-NEXT: ; return to shader part epilog
657664
;

0 commit comments

Comments
 (0)