Skip to content

SelectionDAG: Support FMINIMUMNUM and FMINIMUM in combineMinNumMaxNumImpl #137449

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11516,9 +11516,18 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETLE:
case ISD::SETULT:
case ISD::SETULE: {
// Since it's known never nan to get here already, either fminnum or
// fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
// expanded in terms of it.
// Since it's known never nan to get here already, either fminimumnum,
// fminimum, fminnum, or fminnum_ieee are OK. Try the ieee version first,
// since it's fminnum is expanded in terms of it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain in the comment why IEEE2019Num and IEEE2019 are preferred in that order?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In some architectures FMINNUM is not defined well.
We are working on improve FMINNUM and finally remove FMINNUM_IEEE.
Currently, on some architectures that defines FMINNUM_IEEE , like AArch64/MipsR6, FMINNUM_IEEE is a better choice.

unsigned IEEE2019NumOpcode =
(LHS == True) ? ISD::FMINIMUMNUM : ISD::FMAXIMUMNUM;
if (TLI.isOperationLegal(IEEE2019NumOpcode, VT))
return DAG.getNode(IEEE2019NumOpcode, DL, VT, LHS, RHS);

unsigned IEEE2019Opcode = (LHS == True) ? ISD::FMINIMUM : ISD::FMAXIMUM;
if (TLI.isOperationLegal(IEEE2019Opcode, VT))
return DAG.getNode(IEEE2019Opcode, DL, VT, LHS, RHS);

unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
Expand All @@ -11534,6 +11543,15 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
case ISD::SETGE:
case ISD::SETUGT:
case ISD::SETUGE: {
unsigned IEEE2019NumOpcode =
(LHS == True) ? ISD::FMAXIMUMNUM : ISD::FMINIMUMNUM;
if (TLI.isOperationLegal(IEEE2019NumOpcode, VT))
return DAG.getNode(IEEE2019NumOpcode, DL, VT, LHS, RHS);

unsigned IEEE2019Opcode = (LHS == True) ? ISD::FMAXIMUM : ISD::FMINIMUM;
if (TLI.isOperationLegal(IEEE2019Opcode, VT))
return DAG.getNode(IEEE2019Opcode, DL, VT, LHS, RHS);

unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
Expand Down
42 changes: 30 additions & 12 deletions llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule float %a, %b
%val = select nnan nsz i1 %cmp, float %a, float %b
Expand Down Expand Up @@ -236,7 +236,7 @@ define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge float %a, %b
%val = select nnan nsz i1 %cmp, float %a, float %b
Expand Down Expand Up @@ -693,7 +693,7 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
Expand All @@ -703,7 +703,7 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1
; GFX12-FAKE16-NEXT: v_minimum_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule half %a, %b
%val = select nnan nsz i1 %cmp, half %a, half %b
Expand Down Expand Up @@ -872,7 +872,7 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
Expand All @@ -882,7 +882,7 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1
; GFX12-FAKE16-NEXT: v_maximum_f16 v0, v0, v1
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge half %a, %b
%val = select nnan nsz i1 %cmp, half %a, half %b
Expand Down Expand Up @@ -1122,7 +1122,7 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1
; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <2 x half> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
Expand Down Expand Up @@ -1362,7 +1362,7 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1
; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge <2 x half> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
Expand Down Expand Up @@ -1692,8 +1692,12 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-NEXT: v_pk_min_f16 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
Expand All @@ -1703,6 +1707,11 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2
; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -2034,8 +2043,12 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_max_f16 v2, v2, v2
; GFX9-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
; GFX9-NEXT: v_pk_max_f16 v2, v3, v3
; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-NEXT: v_pk_max_f16 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
Expand All @@ -2045,6 +2058,11 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0
; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2
; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -2079,7 +2097,7 @@ define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1
; GFX12-NEXT: v_minimum_f32 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a = fadd nnan float %arg0, %arg0
%b = fadd nnan float %arg1, %arg1
Expand Down Expand Up @@ -2114,7 +2132,7 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1
; GFX12-NEXT: v_maximum_f32 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%a = fadd nnan float %arg0, %arg0
%b = fadd nnan float %arg1, %arg1
Expand Down
31 changes: 11 additions & 20 deletions llvm/test/CodeGen/WebAssembly/f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,10 @@ define float @fminnum32_intrinsic(float %x, float %y) {
; CHECK-LABEL: fminnum32_intrinsic:
; CHECK: .functype fminnum32_intrinsic (f32, f32) -> (f32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push4=, 1
; CHECK-NEXT: local.get $push3=, 0
; CHECK-NEXT: local.get $push2=, 1
; CHECK-NEXT: f32.lt $push0=, $pop3, $pop2
; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0
; CHECK-NEXT: return $pop1
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: local.get $push1=, 1
; CHECK-NEXT: f32.min $push0=, $pop2, $pop1
Copy link
Member

@dschuff dschuff Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sunfishcode
It looks like the existing code is incorrect: IIUC llvm.minnum is supposed to return the number if a normal number is compared against a qNaN, and a NaN (with an exception) if a normal number if is compared against a sNaN. Whereas the existing code will always return the LHS (NaN or otherwise) if either value is a NaN.

Because Wasm doesn't support FP exceptions and doesn't distinguish between sNaN and qNaN, implementing llvm.minnum directly with Wasm's f32.min would make it always return a NaN if either operand is a NaN; which also seems not quite right but is at least consistent? What is the behavior we want here? Is there precedent for how systems that don't support FPEs should treat qNaN vs sNaN?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, the existing code looks incorrect.

The new code here also looks incorrect; Wasm's f32.min corresponds to IEEE 754-2019's minimum, meaning it prefers the NaN, which is not correct for implementing llvm.minnum.

; CHECK-NEXT: return $pop0
%a = call nnan float @llvm.minnum.f32(float %x, float %y)
ret float %a
}
Expand Down Expand Up @@ -282,13 +279,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) {
; CHECK-LABEL: fmaxnum32_intrinsic:
; CHECK: .functype fmaxnum32_intrinsic (f32, f32) -> (f32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push4=, 1
; CHECK-NEXT: local.get $push3=, 0
; CHECK-NEXT: local.get $push2=, 1
; CHECK-NEXT: f32.gt $push0=, $pop3, $pop2
; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0
; CHECK-NEXT: return $pop1
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: local.get $push1=, 1
; CHECK-NEXT: f32.max $push0=, $pop2, $pop1
; CHECK-NEXT: return $pop0
%a = call nnan float @llvm.maxnum.f32(float %x, float %y)
ret float %a
}
Expand All @@ -309,13 +303,10 @@ define float @fmaxnum32_zero_intrinsic(float %x) {
; CHECK-LABEL: fmaxnum32_zero_intrinsic:
; CHECK: .functype fmaxnum32_zero_intrinsic (f32) -> (f32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: f32.const $push0=, 0x0p0
; CHECK-NEXT: local.get $push4=, 0
; CHECK-NEXT: f32.const $push3=, 0x0p0
; CHECK-NEXT: f32.gt $push1=, $pop4, $pop3
; CHECK-NEXT: f32.select $push2=, $pop5, $pop0, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: f32.max $push1=, $pop2, $pop0
; CHECK-NEXT: return $pop1
%a = call nnan float @llvm.maxnum.f32(float %x, float 0.0)
ret float %a
}
Expand Down
40 changes: 14 additions & 26 deletions llvm/test/CodeGen/WebAssembly/f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,10 @@ define double @fminnum64_intrinsic(double %x, double %y) {
; CHECK-LABEL: fminnum64_intrinsic:
; CHECK: .functype fminnum64_intrinsic (f64, f64) -> (f64)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push4=, 1
; CHECK-NEXT: local.get $push3=, 0
; CHECK-NEXT: local.get $push2=, 1
; CHECK-NEXT: f64.lt $push0=, $pop3, $pop2
; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0
; CHECK-NEXT: return $pop1
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: local.get $push1=, 1
; CHECK-NEXT: f64.min $push0=, $pop2, $pop1
; CHECK-NEXT: return $pop0
%a = call nnan double @llvm.minnum.f64(double %x, double %y)
ret double %a
}
Expand All @@ -256,13 +253,10 @@ define double @fminnum64_zero_intrinsic(double %x) {
; CHECK-LABEL: fminnum64_zero_intrinsic:
; CHECK: .functype fminnum64_zero_intrinsic (f64) -> (f64)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: f64.const $push0=, -0x0p0
; CHECK-NEXT: local.get $push4=, 0
; CHECK-NEXT: f64.const $push3=, -0x0p0
; CHECK-NEXT: f64.lt $push1=, $pop4, $pop3
; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: f64.min $push1=, $pop2, $pop0
; CHECK-NEXT: return $pop1
%a = call nnan double @llvm.minnum.f64(double %x, double -0.0)
ret double %a
}
Expand Down Expand Up @@ -297,13 +291,10 @@ define double@fmaxnum64_intrinsic(double %x, double %y) {
; CHECK-LABEL: fmaxnum64_intrinsic:
; CHECK: .functype fmaxnum64_intrinsic (f64, f64) -> (f64)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push4=, 1
; CHECK-NEXT: local.get $push3=, 0
; CHECK-NEXT: local.get $push2=, 1
; CHECK-NEXT: f64.gt $push0=, $pop3, $pop2
; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0
; CHECK-NEXT: return $pop1
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: local.get $push1=, 1
; CHECK-NEXT: f64.max $push0=, $pop2, $pop1
; CHECK-NEXT: return $pop0
%a = call nnan double @llvm.maxnum.f64(double %x, double %y)
ret double %a
}
Expand All @@ -324,13 +315,10 @@ define double @fmaxnum64_zero_intrinsic(double %x) {
; CHECK-LABEL: fmaxnum64_zero_intrinsic:
; CHECK: .functype fmaxnum64_zero_intrinsic (f64) -> (f64)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get $push5=, 0
; CHECK-NEXT: local.get $push2=, 0
; CHECK-NEXT: f64.const $push0=, 0x0p0
; CHECK-NEXT: local.get $push4=, 0
; CHECK-NEXT: f64.const $push3=, 0x0p0
; CHECK-NEXT: f64.gt $push1=, $pop4, $pop3
; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: f64.max $push1=, $pop2, $pop0
; CHECK-NEXT: return $pop1
%a = call nnan double @llvm.maxnum.f64(double %x, double 0.0)
ret double %a
}
Expand Down
Loading
Loading