@@ -2141,6 +2141,8 @@ TEST(APFloatTest, getZero) {
2141
2141
{&APFloat::Float8E4M3FNUZ (), true , false , {0 , 0 }, 1 },
2142
2142
{&APFloat::Float8E4M3B11FNUZ (), false , false , {0 , 0 }, 1 },
2143
2143
{&APFloat::Float8E4M3B11FNUZ (), true , false , {0 , 0 }, 1 },
2144
+ {&APFloat::Float8E3M4 (), false , true , {0 , 0 }, 1 },
2145
+ {&APFloat::Float8E3M4 (), true , true , {0x80ULL , 0 }, 1 },
2144
2146
{&APFloat::FloatTF32 (), false , true , {0 , 0 }, 1 },
2145
2147
{&APFloat::FloatTF32 (), true , true , {0x40000ULL , 0 }, 1 },
2146
2148
{&APFloat::Float6E3M2FN (), false , true , {0 , 0 }, 1 },
@@ -6636,6 +6638,45 @@ TEST(APFloatTest, Float8E4M3FNUZToDouble) {
6636
6638
EXPECT_TRUE (std::isnan (QNaN.convertToDouble ()));
6637
6639
}
6638
6640
6641
+ TEST (APFloatTest, Float8E3M4ToDouble) {
6642
+ APFloat PosZero = APFloat::getZero (APFloat::Float8E3M4 (), false );
6643
+ APFloat PosZeroToDouble (PosZero.convertToDouble ());
6644
+ EXPECT_TRUE (PosZeroToDouble.isPosZero ());
6645
+ APFloat NegZero = APFloat::getZero (APFloat::Float8E3M4 (), true );
6646
+ APFloat NegZeroToDouble (NegZero.convertToDouble ());
6647
+ EXPECT_TRUE (NegZeroToDouble.isNegZero ());
6648
+
6649
+ APFloat One (APFloat::Float8E3M4 (), " 1.0" );
6650
+ EXPECT_EQ (1.0 , One.convertToDouble ());
6651
+ APFloat Two (APFloat::Float8E3M4 (), " 2.0" );
6652
+ EXPECT_EQ (2.0 , Two.convertToDouble ());
6653
+ APFloat PosLargest = APFloat::getLargest (APFloat::Float8E3M4 (), false );
6654
+ EXPECT_EQ (15 .5F , PosLargest.convertToDouble ());
6655
+ APFloat NegLargest = APFloat::getLargest (APFloat::Float8E3M4 (), true );
6656
+ EXPECT_EQ (-15 .5F , NegLargest.convertToDouble ());
6657
+ APFloat PosSmallest =
6658
+ APFloat::getSmallestNormalized (APFloat::Float8E3M4 (), false );
6659
+ EXPECT_EQ (0x1 .p -2 , PosSmallest.convertToDouble ());
6660
+ APFloat NegSmallest =
6661
+ APFloat::getSmallestNormalized (APFloat::Float8E3M4 (), true );
6662
+ EXPECT_EQ (-0x1 .p -2 , NegSmallest.convertToDouble ());
6663
+
6664
+ APFloat PosSmallestDenorm =
6665
+ APFloat::getSmallest (APFloat::Float8E3M4 (), false );
6666
+ EXPECT_TRUE (PosSmallestDenorm.isDenormal ());
6667
+ EXPECT_EQ (0x1 .p -6 , PosSmallestDenorm.convertToDouble ());
6668
+ APFloat NegSmallestDenorm = APFloat::getSmallest (APFloat::Float8E3M4 (), true );
6669
+ EXPECT_TRUE (NegSmallestDenorm.isDenormal ());
6670
+ EXPECT_EQ (-0x1 .p -6 , NegSmallestDenorm.convertToDouble ());
6671
+
6672
+ APFloat PosInf = APFloat::getInf (APFloat::Float8E3M4 ());
6673
+ EXPECT_EQ (std::numeric_limits<double >::infinity (), PosInf.convertToDouble ());
6674
+ APFloat NegInf = APFloat::getInf (APFloat::Float8E3M4 (), true );
6675
+ EXPECT_EQ (-std::numeric_limits<double >::infinity (), NegInf.convertToDouble ());
6676
+ APFloat QNaN = APFloat::getQNaN (APFloat::Float8E3M4 ());
6677
+ EXPECT_TRUE (std::isnan (QNaN.convertToDouble ()));
6678
+ }
6679
+
6639
6680
TEST (APFloatTest, FloatTF32ToDouble) {
6640
6681
APFloat One (APFloat::FloatTF32 (), " 1.0" );
6641
6682
EXPECT_EQ (1.0 , One.convertToDouble ());
@@ -6944,6 +6985,46 @@ TEST(APFloatTest, Float8E4M3FNToFloat) {
6944
6985
EXPECT_TRUE (std::isnan (QNaN.convertToFloat ()));
6945
6986
}
6946
6987
6988
+ TEST (APFloatTest, Float8E3M4ToFloat) {
6989
+ APFloat PosZero = APFloat::getZero (APFloat::Float8E3M4 (), false );
6990
+ APFloat PosZeroToFloat (PosZero.convertToFloat ());
6991
+ EXPECT_TRUE (PosZeroToFloat.isPosZero ());
6992
+ APFloat NegZero = APFloat::getZero (APFloat::Float8E3M4 (), true );
6993
+ APFloat NegZeroToFloat (NegZero.convertToFloat ());
6994
+ EXPECT_TRUE (NegZeroToFloat.isNegZero ());
6995
+
6996
+ APFloat One (APFloat::Float8E3M4 (), " 1.0" );
6997
+ EXPECT_EQ (1 .0F , One.convertToFloat ());
6998
+ APFloat Two (APFloat::Float8E3M4 (), " 2.0" );
6999
+ EXPECT_EQ (2 .0F , Two.convertToFloat ());
7000
+
7001
+ APFloat PosLargest = APFloat::getLargest (APFloat::Float8E3M4 (), false );
7002
+ EXPECT_EQ (15 .5F , PosLargest.convertToFloat ());
7003
+ APFloat NegLargest = APFloat::getLargest (APFloat::Float8E3M4 (), true );
7004
+ EXPECT_EQ (-15 .5F , NegLargest.convertToFloat ());
7005
+ APFloat PosSmallest =
7006
+ APFloat::getSmallestNormalized (APFloat::Float8E3M4 (), false );
7007
+ EXPECT_EQ (0x1 .p -2 , PosSmallest.convertToFloat ());
7008
+ APFloat NegSmallest =
7009
+ APFloat::getSmallestNormalized (APFloat::Float8E3M4 (), true );
7010
+ EXPECT_EQ (-0x1 .p -2 , NegSmallest.convertToFloat ());
7011
+
7012
+ APFloat PosSmallestDenorm =
7013
+ APFloat::getSmallest (APFloat::Float8E3M4 (), false );
7014
+ EXPECT_TRUE (PosSmallestDenorm.isDenormal ());
7015
+ EXPECT_EQ (0x1 .p -6 , PosSmallestDenorm.convertToFloat ());
7016
+ APFloat NegSmallestDenorm = APFloat::getSmallest (APFloat::Float8E3M4 (), true );
7017
+ EXPECT_TRUE (NegSmallestDenorm.isDenormal ());
7018
+ EXPECT_EQ (-0x1 .p -6 , NegSmallestDenorm.convertToFloat ());
7019
+
7020
+ APFloat PosInf = APFloat::getInf (APFloat::Float8E3M4 ());
7021
+ EXPECT_EQ (std::numeric_limits<float >::infinity (), PosInf.convertToFloat ());
7022
+ APFloat NegInf = APFloat::getInf (APFloat::Float8E3M4 (), true );
7023
+ EXPECT_EQ (-std::numeric_limits<float >::infinity (), NegInf.convertToFloat ());
7024
+ APFloat QNaN = APFloat::getQNaN (APFloat::Float8E3M4 ());
7025
+ EXPECT_TRUE (std::isnan (QNaN.convertToFloat ()));
7026
+ }
7027
+
6947
7028
TEST (APFloatTest, FloatTF32ToFloat) {
6948
7029
APFloat PosZero = APFloat::getZero (APFloat::FloatTF32 ());
6949
7030
APFloat PosZeroToFloat (PosZero.convertToFloat ());
0 commit comments