@@ -59,6 +59,28 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
59
59
return maxnum (Src0, Src1);
60
60
}
61
61
62
+ enum class KnownIEEEMode { Unknown, On, Off };
63
+
64
+ // / Return KnownIEEEMode::On if we know if the use context can assume
65
+ // / "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
66
+ // / "amdgpu-ieee"="false".
67
+ static KnownIEEEMode fpenvIEEEMode (const Instruction &I,
68
+ const GCNSubtarget &ST) {
69
+ if (!ST.hasIEEEMode ()) // Only mode on gfx12
70
+ return KnownIEEEMode::On;
71
+
72
+ const Function *F = I.getFunction ();
73
+ if (!F)
74
+ return KnownIEEEMode::Unknown;
75
+
76
+ Attribute IEEEAttr = F->getFnAttribute (" amdgpu-ieee" );
77
+ if (IEEEAttr.isValid ())
78
+ return IEEEAttr.getValueAsBool () ? KnownIEEEMode::On : KnownIEEEMode::Off;
79
+
80
+ return AMDGPU::isShader (F->getCallingConv ()) ? KnownIEEEMode::Off
81
+ : KnownIEEEMode::On;
82
+ }
83
+
62
84
// Check if a value can be converted to a 16-bit value without losing
63
85
// precision.
64
86
// The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -843,9 +865,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
843
865
break ;
844
866
}
845
867
case Intrinsic::amdgcn_fmed3: {
846
- // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
847
- // for the shader.
848
-
849
868
Value *Src0 = II.getArgOperand (0 );
850
869
Value *Src1 = II.getArgOperand (1 );
851
870
Value *Src2 = II.getArgOperand (2 );
@@ -858,16 +877,85 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
858
877
if (II.isStrictFP ())
859
878
break ;
860
879
880
+ // med3 with a nan input acts like
881
+ // v_min_f32(v_min_f32(s0, s1), s2)
882
+ //
883
+ // Signalingness is ignored with ieee=0, so we fold to
884
+ // minimumnum/maximumnum. With ieee=1, the v_min_f32 acts like llvm.minnum
885
+ // with signaling nan handling. With ieee=0, like llvm.minimumnum except a
886
+ // returned signaling nan will not be quieted.
887
+
888
+ // ieee=1
889
+ // s0 snan: s2
890
+ // s1 snan: s2
891
+ // s2 snan: qnan
892
+
893
+ // s0 qnan: min(s1, s2)
894
+ // s1 qnan: min(s0, s2)
895
+ // s2 qnan: min(s0, s1)
896
+
897
+ // ieee=0
898
+ // s0 _nan: min(s1, s2)
899
+ // s1 _nan: min(s0, s2)
900
+ // s2 _nan: min(s0, s1)
901
+
861
902
// Checking for NaN before canonicalization provides better fidelity when
862
903
// mapping other operations onto fmed3 since the order of operands is
863
904
// unchanged.
864
905
Value *V = nullptr ;
865
- if (match (Src0, PatternMatch::m_NaN ()) || isa<UndefValue>(Src0)) {
866
- V = IC.Builder .CreateMinNum (Src1, Src2);
867
- } else if (match (Src1, PatternMatch::m_NaN ()) || isa<UndefValue>(Src1)) {
868
- V = IC.Builder .CreateMinNum (Src0, Src2);
869
- } else if (match (Src2, PatternMatch::m_NaN ()) || isa<UndefValue>(Src2)) {
870
- V = IC.Builder .CreateMinNum (Src0, Src1);
906
+ const APFloat *ConstSrc0 = nullptr ;
907
+ const APFloat *ConstSrc1 = nullptr ;
908
+ const APFloat *ConstSrc2 = nullptr ;
909
+
910
+ // TODO: Also can fold to 2 operands with infinities.
911
+ if ((match (Src0, m_APFloat (ConstSrc0)) && ConstSrc0->isNaN ()) ||
912
+ isa<UndefValue>(Src0)) {
913
+ switch (fpenvIEEEMode (II, *ST)) {
914
+ case KnownIEEEMode::On:
915
+ // TODO: If Src2 is snan, does it need quieting?
916
+ if (ConstSrc0 && ConstSrc0->isSignaling ())
917
+ return IC.replaceInstUsesWith (II, Src2);
918
+ V = IC.Builder .CreateMinNum (Src1, Src2);
919
+ break ;
920
+ case KnownIEEEMode::Off:
921
+ V = IC.Builder .CreateMinimumNum (Src1, Src2);
922
+ break ;
923
+ case KnownIEEEMode::Unknown:
924
+ break ;
925
+ }
926
+ } else if ((match (Src1, m_APFloat (ConstSrc1)) && ConstSrc1->isNaN ()) ||
927
+ isa<UndefValue>(Src1)) {
928
+ switch (fpenvIEEEMode (II, *ST)) {
929
+ case KnownIEEEMode::On:
930
+ // TODO: If Src2 is snan, does it need quieting?
931
+ if (ConstSrc1 && ConstSrc1->isSignaling ())
932
+ return IC.replaceInstUsesWith (II, Src2);
933
+
934
+ V = IC.Builder .CreateMinNum (Src0, Src2);
935
+ break ;
936
+ case KnownIEEEMode::Off:
937
+ V = IC.Builder .CreateMinimumNum (Src0, Src2);
938
+ break ;
939
+ case KnownIEEEMode::Unknown:
940
+ break ;
941
+ }
942
+ } else if ((match (Src2, m_APFloat (ConstSrc2)) && ConstSrc2->isNaN ()) ||
943
+ isa<UndefValue>(Src2)) {
944
+ switch (fpenvIEEEMode (II, *ST)) {
945
+ case KnownIEEEMode::On:
946
+ if (ConstSrc2 && ConstSrc2->isSignaling ()) {
947
+ auto *Quieted = ConstantFP::get (II.getType (), ConstSrc2->makeQuiet ());
948
+ return IC.replaceInstUsesWith (II, Quieted);
949
+ }
950
+
951
+ V = IC.Builder .CreateMinNum (Src0, Src1);
952
+ break ;
953
+ case KnownIEEEMode::Off:
954
+ V = IC.Builder .CreateMaximumNum (Src0, Src1);
955
+ break ;
956
+ case KnownIEEEMode::Unknown:
957
+ break ;
958
+ }
871
959
}
872
960
873
961
if (V) {
0 commit comments