Skip to content

Commit 55fa0ee

Browse files
sergey-kozubGoogle-ML-Automation
authored andcommitted
PR #19096: Add F4E2M1FN and F8E8M0FNU types
Imported from GitHub PR #19096 This PR adds F4E2M1FN primitive type (4-bit float with 2 bits exponent and 1 bit mantissa), F8E8M0FNU primitive type (8-bit float with 8 bits exponent, no mantissa and no sign) and enables loads/stores in the same way S4/U4 type is implemented. This will enable using microscaling (MX) formats ([RFC](#18085)), such as MXFP4. ```c F4E2M1FN - Exponent bias: 1 - Maximum stored exponent value: 3 (binary 11) - Maximum unbiased exponent value: 3 - 1 = 2 - Minimum stored exponent value: 1 (binary 01) - Minimum unbiased exponent value: 1 − 1 = 0 - Has Positive and Negative zero - Doesn't have infinity - Doesn't have NaNs Additional details: - Zeros (+/-): S.00.0 - Max normal number: S.11.1 = ±2^(2) x (1 + 0.5) = ±6.0 - Min normal number: S.01.0 = ±2^(0) = ±1.0 - Min subnormal number: S.00.1 = ±2^(0) x 0.5 = ±0.5 F8E8M0FNU - Exponent bias: 127 - Maximum stored exponent value: 254 (binary 1111'1110) - Maximum unbiased exponent value: 254 - 127 = 127 - Minimum stored exponent value: 0 (binary 0000'0000) - Minimum unbiased exponent value: 0 − 127 = -127 - Doesn't have zero - Doesn't have infinity - NaN is encoded as binary 1111'1111 Additional details: - Zeros cannot be represented - Negative values cannot be represented - Mantissa is always 1 ``` Related PRs: - openxla/stablehlo#2582 - jax-ml/ml_dtypes#181 - llvm/llvm-project#95392 - llvm/llvm-project#108877 - jax-ml/ml_dtypes#166 - llvm/llvm-project#107127 - llvm/llvm-project#111028 The PR is split into multiple commits just to make the review easier, it is possible that some tests could fail if only some (i.e. not all) of these commits are applied. Copybara import of the project: -- f493e48 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: import mxfloat.h -- 87d0056 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: primitive type -- 70ca820 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: literal support -- c479f09 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: conversion codegen -- daaa3af by Sergey Kozub <[email protected]>: Add F4E2M1FN type: python interface -- 1f0e19f by Sergey Kozub <[email protected]>: Add F4E2M1FN type: FFI -- 999bf96 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: HLO evaluator -- d7d5af7 by Sergey Kozub <[email protected]>: Add F4E2M1FN type: add tests -- 9e8c7bc by Sergey Kozub <[email protected]>: Add F8E8M0FNU type -- 1e34417 by Sergey Kozub <[email protected]>: Addressing PR#19096 review comments -- d4de0a3 by Sergey Kozub <[email protected]>: Addressing PR#19096 review comments (round 2) Merging this change closes #19096 FUTURE_COPYBARA_INTEGRATE_REVIEW=#19096 from openxla:skozub/e2m1 d4de0a3 PiperOrigin-RevId: 707638099
1 parent edcdfcf commit 55fa0ee

File tree

83 files changed

+1856
-367
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+1856
-367
lines changed

third_party/tsl/tsl/platform/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,7 @@ cc_library(
985985
deps = [
986986
"@ml_dtypes//:float8",
987987
"@ml_dtypes//:intn",
988+
"@ml_dtypes//:mxfloat",
988989
],
989990
)
990991

third_party/tsl/tsl/platform/ml_dtypes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,18 @@ limitations under the License.
1818

1919
#include "ml_dtypes/include/float8.h" // from @ml_dtypes
2020
#include "ml_dtypes/include/intn.h" // from @ml_dtypes
21+
#include "ml_dtypes/include/mxfloat.h" // from @ml_dtypes
2122

2223
namespace tsl {
24+
using float4_e2m1fn = ::ml_dtypes::float4_e2m1fn;
2325
using float8_e3m4 = ::ml_dtypes::float8_e3m4;
2426
using float8_e4m3 = ::ml_dtypes::float8_e4m3;
2527
using float8_e4m3fn = ::ml_dtypes::float8_e4m3fn;
2628
using float8_e4m3fnuz = ::ml_dtypes::float8_e4m3fnuz;
2729
using float8_e4m3b11fnuz = ::ml_dtypes::float8_e4m3b11fnuz;
2830
using float8_e5m2 = ::ml_dtypes::float8_e5m2;
2931
using float8_e5m2fnuz = ::ml_dtypes::float8_e5m2fnuz;
32+
using float8_e8m0fnu = ::ml_dtypes::float8_e8m0fnu;
3033

3134
using int1 = ::ml_dtypes::int1;
3235
using uint1 = ::ml_dtypes::uint1;

xla/array2d_test.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,34 @@ TEST(Array2dTest, LinspaceF8E3M4) {
219219
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 1)), 3.5);
220220
}
221221

222+
TEST(Array2dTest, LinspaceF4E2M1FN) {
223+
auto arr = MakeLinspaceArray2D<tsl::float4_e2m1fn>(1.0, 3.5, 3, 2);
224+
225+
EXPECT_EQ(arr->n1(), 3);
226+
EXPECT_EQ(arr->n2(), 2);
227+
228+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 0)), 1.0);
229+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 1)), 1.5);
230+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 0)), 2.0);
231+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 1)), 2.0); // 2.5 rounded down
232+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 0)), 3.0);
233+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 1)), 4.0); // 3.5 rounded up
234+
}
235+
236+
TEST(Array2dTest, LinspaceF8E8M0FNU) {
237+
auto arr = MakeLinspaceArray2D<tsl::float8_e8m0fnu>(1.0, 3.5, 3, 2);
238+
239+
EXPECT_EQ(arr->n1(), 3);
240+
EXPECT_EQ(arr->n2(), 2);
241+
242+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 0)), 1.0);
243+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(0, 1)), 2.0); // 1.5 rounded up
244+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 0)), 2.0);
245+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(1, 1)), 2.0); // 2.5 rounded down
246+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 0)), 4.0); // 3.0 rounded up
247+
EXPECT_FLOAT_EQ(static_cast<float>((*arr)(2, 1)), 4.0); // 3.5 rounded up
248+
}
249+
222250
TEST(Array2dTest, Stringification) {
223251
auto arr = MakeLinspaceArray2D(1.0, 3.5, 3, 2);
224252
const std::string expected = R"([[1, 1.5],

0 commit comments

Comments
 (0)