Skip to content

Commit 038d357

Browse files
authored
AMDGPU: Use minimumnum/maximumnum for fmed3 with amdgpu-ieee=0
(#139546) Try to respect the signaling nan behavior of the instruction, so also start the special case fold for src2.
1 parent b2cd40d commit 038d357

File tree

2 files changed

+391
-118
lines changed

2 files changed

+391
-118
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 97 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
5959
return maxnum(Src0, Src1);
6060
}
6161

62+
enum class KnownIEEEMode { Unknown, On, Off };
63+
64+
/// Return KnownIEEEMode::On if we know if the use context can assume
65+
/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
66+
/// "amdgpu-ieee"="false".
67+
static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
68+
const GCNSubtarget &ST) {
69+
if (!ST.hasIEEEMode()) // Only mode on gfx12
70+
return KnownIEEEMode::On;
71+
72+
const Function *F = I.getFunction();
73+
if (!F)
74+
return KnownIEEEMode::Unknown;
75+
76+
Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
77+
if (IEEEAttr.isValid())
78+
return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
79+
80+
return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
81+
: KnownIEEEMode::On;
82+
}
83+
6284
// Check if a value can be converted to a 16-bit value without losing
6385
// precision.
6486
// The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
843865
break;
844866
}
845867
case Intrinsic::amdgcn_fmed3: {
846-
// Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
847-
// for the shader.
848-
849868
Value *Src0 = II.getArgOperand(0);
850869
Value *Src1 = II.getArgOperand(1);
851870
Value *Src2 = II.getArgOperand(2);
@@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
858877
if (II.isStrictFP())
859878
break;
860879

880+
// med3 with a nan input acts like
881+
// v_min_f32(v_min_f32(s0, s1), s2)
882+
//
883+
// Signalingness is ignored with ieee=0, so we fold to
884+
// minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum
885+
// with signaling nan handling. With ieee=0, like llvm.minimumnum except a
886+
// returned signaling nan will not be quieted.
887+
888+
// ieee=1
889+
// s0 snan: s2
890+
// s1 snan: s2
891+
// s2 snan: qnan
892+
893+
// s0 qnan: min(s1, s2)
894+
// s1 qnan: min(s0, s2)
895+
// s2 qnan: min(s0, s1)
896+
897+
// ieee=0
898+
// s0 _nan: min(s1, s2)
899+
// s1 _nan: min(s0, s2)
900+
// s2 _nan: min(s0, s1)
901+
861902
// Checking for NaN before canonicalization provides better fidelity when
862903
// mapping other operations onto fmed3 since the order of operands is
863904
// unchanged.
864905
Value *V = nullptr;
865-
if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
866-
V = IC.Builder.CreateMinNum(Src1, Src2);
867-
} else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
868-
V = IC.Builder.CreateMinNum(Src0, Src2);
869-
} else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
870-
V = IC.Builder.CreateMinNum(Src0, Src1);
906+
const APFloat *ConstSrc0 = nullptr;
907+
const APFloat *ConstSrc1 = nullptr;
908+
const APFloat *ConstSrc2 = nullptr;
909+
910+
// TODO: Also can fold to 2 operands with infinities.
911+
if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
912+
isa<UndefValue>(Src0)) {
913+
switch (fpenvIEEEMode(II, *ST)) {
914+
case KnownIEEEMode::On:
915+
// TODO: If Src2 is snan, does it need quieting?
916+
if (ConstSrc0 && ConstSrc0->isSignaling())
917+
return IC.replaceInstUsesWith(II, Src2);
918+
V = IC.Builder.CreateMinNum(Src1, Src2);
919+
break;
920+
case KnownIEEEMode::Off:
921+
V = IC.Builder.CreateMinimumNum(Src1, Src2);
922+
break;
923+
case KnownIEEEMode::Unknown:
924+
break;
925+
}
926+
} else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
927+
isa<UndefValue>(Src1)) {
928+
switch (fpenvIEEEMode(II, *ST)) {
929+
case KnownIEEEMode::On:
930+
// TODO: If Src2 is snan, does it need quieting?
931+
if (ConstSrc1 && ConstSrc1->isSignaling())
932+
return IC.replaceInstUsesWith(II, Src2);
933+
934+
V = IC.Builder.CreateMinNum(Src0, Src2);
935+
break;
936+
case KnownIEEEMode::Off:
937+
V = IC.Builder.CreateMinimumNum(Src0, Src2);
938+
break;
939+
case KnownIEEEMode::Unknown:
940+
break;
941+
}
942+
} else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
943+
isa<UndefValue>(Src2)) {
944+
switch (fpenvIEEEMode(II, *ST)) {
945+
case KnownIEEEMode::On:
946+
if (ConstSrc2 && ConstSrc2->isSignaling()) {
947+
auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());
948+
return IC.replaceInstUsesWith(II, Quieted);
949+
}
950+
951+
V = IC.Builder.CreateMinNum(Src0, Src1);
952+
break;
953+
case KnownIEEEMode::Off:
954+
V = IC.Builder.CreateMaximumNum(Src0, Src1);
955+
break;
956+
case KnownIEEEMode::Unknown:
957+
break;
958+
}
871959
}
872960

873961
if (V) {

0 commit comments

Comments
 (0)