Add F4E2M1FN type: python interface

sergey-kozub · sergey-kozub · commit daaa3af3ce3a · 2024-12-18T13:09:21.000Z
diff --git a/xla/pjrt/c/pjrt_c_api.h b/xla/pjrt/c/pjrt_c_api.h
@@ -649,6 +649,9 @@ typedef enum {
   // More truncated 8 bit floating-point formats.
   PJRT_Buffer_Type_F8E4M3,
   PJRT_Buffer_Type_F8E3M4,
+
+  // 4-bit MX floating-point format.
+  PJRT_Buffer_Type_F4E2M1FN,
 } PJRT_Buffer_Type;
 
 typedef enum {
diff --git a/xla/pjrt/c/pjrt_c_api_helpers.cc b/xla/pjrt/c/pjrt_c_api_helpers.cc
@@ -294,6 +294,8 @@ PJRT_Buffer_Type ConvertToPjRtBufferType(xla::PrimitiveType type) {
       return PJRT_Buffer_Type::PJRT_Buffer_Type_BF16;
     case xla::PrimitiveType::F64:
       return PJRT_Buffer_Type::PJRT_Buffer_Type_F64;
+    case xla::PrimitiveType::F4E2M1FN:
+      return PJRT_Buffer_Type::PJRT_Buffer_Type_F4E2M1FN;
     case xla::PrimitiveType::F8E5M2:
       return PJRT_Buffer_Type::PJRT_Buffer_Type_F8E5M2;
     case xla::PrimitiveType::F8E4M3:
@@ -361,6 +363,8 @@ xla::PrimitiveType ConvertFromPjRtBufferType(PJRT_Buffer_Type type) {
       return xla::PrimitiveType::C64;
     case PJRT_Buffer_Type::PJRT_Buffer_Type_C128:
       return xla::PrimitiveType::C128;
+    case PJRT_Buffer_Type::PJRT_Buffer_Type_F4E2M1FN:
+      return xla::PrimitiveType::F4E2M1FN;
     case PJRT_Buffer_Type::PJRT_Buffer_Type_F8E5M2:
       return xla::PrimitiveType::F8E5M2;
     case PJRT_Buffer_Type::PJRT_Buffer_Type_F8E4M3:
diff --git a/xla/python/ifrt/dtype.cc b/xla/python/ifrt/dtype.cc
@@ -32,6 +32,7 @@ std::optional<int> DType::byte_size() const {
     case kU2:
     case kS4:
     case kU4:
+    case kF4E2M1FN:
       // Smaller than a byte.
       return std::nullopt;
     case kPred:
@@ -77,6 +78,7 @@ std::optional<int> DType::bit_size() const {
       return 2;
     case kS4:
     case kU4:
+    case kF4E2M1FN:
       return 4;
     case kPred:
     case kS8:
@@ -142,6 +144,7 @@ absl::StatusOr<DType> DType::FromProto(const DTypeProto& dtype_proto) {
       CASE(C64);
       CASE(C128);
       // TODO: Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+      // CASE(F4E2M1FN);
       // CASE(F8E3M4);
       // CASE(F8E4M3);
       CASE(F8E4M3FN);
@@ -190,6 +193,7 @@ DTypeProto DType::ToProto() const {
       CASE(C64);
       CASE(C128);
       // TODO: Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+      // CASE(F4E2M1FN);
       // CASE(F8E3M4);
       // CASE(F8E4M3);
       CASE(F8E4M3FN);
diff --git a/xla/python/ifrt/dtype.h b/xla/python/ifrt/dtype.h
@@ -89,7 +89,10 @@ class DType {
     kF8E5M2 = 19,
     kF8E5M2FNUZ = 24,
 
-    // Next = 30
+    // MX floating point types.
+    kF4E2M1FN = 30,
+
+    // Next = 31
 
     // Variable-length string represented as raw bytes, as in `bytes` in Python,
     // i.e., no encoding enforcement. String is not support in XLA. DType.Kind
diff --git a/xla/python/ifrt/dtype.proto b/xla/python/ifrt/dtype.proto
@@ -71,11 +71,16 @@ message DTypeProto {
     KIND_F8E5M2 = 19;
     KIND_F8E5M2FNUZ = 24;
 
+    // MX floating point types.
+    KIND_F4E2M1FN = 30;
+
     // Variable-length string represented as raw bytes, as in `bytes` in Python,
     // i.e., no encoding enforcement. String is not support in XLA. DType.Kind
     // needs to match xla.PrimitiveType enum, so choose a large enum to avoid
     // collision.
     KIND_STRING = 99;
+
+    // Next: 31
   }
   // LINT.ThenChange()
   Kind kind = 1;
diff --git a/xla/python/ifrt/dtype_test.cc b/xla/python/ifrt/dtype_test.cc
@@ -42,35 +42,21 @@ TEST(DTypeTest, FromToFromProto) {
 TEST(DTypeTest, ByteSize) {
   for (const auto& [kind, byte_size] :
        std::vector<std::tuple<DType::Kind, int>>({
-           {DType::kS2, -1},
-           {DType::kU2, -1},
-           {DType::kS4, -1},
-           {DType::kU4, -1},
-           {DType::kPred, 1},
-           {DType::kS8, 1},
-           {DType::kU8, 1},
-           {DType::kF8E3M4, 1},
-           {DType::kF8E4M3, 1},
-           {DType::kF8E4M3FN, 1},
-           {DType::kF8E4M3B11FNUZ, 1},
-           {DType::kF8E4M3FNUZ, 1},
-           {DType::kF8E5M2, 1},
-           {DType::kF8E5M2FNUZ, 1},
-           {DType::kS16, 2},
-           {DType::kU16, 2},
-           {DType::kF16, 2},
-           {DType::kBF16, 2},
-           {DType::kS32, 4},
-           {DType::kU32, 4},
-           {DType::kF32, 4},
-           {DType::kS64, 8},
-           {DType::kU64, 8},
-           {DType::kF64, 8},
-           {DType::kC64, 8},
-           {DType::kC128, 16},
-           {DType::kToken, -1},
-           {DType::kInvalid, -1},
-           {DType::kString, -1},
+           {DType::kS2, -1},        {DType::kU2, -1},
+           {DType::kS4, -1},        {DType::kU4, -1},
+           {DType::kPred, 1},       {DType::kS8, 1},
+           {DType::kU8, 1},         {DType::kF4E2M1FN, -1},
+           {DType::kF8E3M4, 1},     {DType::kF8E4M3, 1},
+           {DType::kF8E4M3FN, 1},   {DType::kF8E4M3B11FNUZ, 1},
+           {DType::kF8E4M3FNUZ, 1}, {DType::kF8E5M2, 1},
+           {DType::kF8E5M2FNUZ, 1}, {DType::kS16, 2},
+           {DType::kU16, 2},        {DType::kF16, 2},
+           {DType::kBF16, 2},       {DType::kS32, 4},
+           {DType::kU32, 4},        {DType::kF32, 4},
+           {DType::kS64, 8},        {DType::kU64, 8},
+           {DType::kF64, 8},        {DType::kC64, 8},
+           {DType::kC128, 16},      {DType::kToken, -1},
+           {DType::kInvalid, -1},   {DType::kString, -1},
        })) {
     EXPECT_EQ(DType(kind).byte_size(),
               byte_size == -1 ? std::nullopt : std::make_optional(byte_size));
@@ -87,6 +73,7 @@ TEST(DTypeTest, BitSize) {
            {DType::kPred, 8},
            {DType::kS8, 8},
            {DType::kU8, 8},
+           {DType::kF4E2M1FN, 4},
            {DType::kF8E3M4, 8},
            {DType::kF8E4M3, 8},
            {DType::kF8E4M3FN, 8},
diff --git a/xla/python/pjrt_ifrt/pjrt_dtype.cc b/xla/python/pjrt_ifrt/pjrt_dtype.cc
@@ -44,6 +44,7 @@ absl::StatusOr<xla::PrimitiveType> ToPrimitiveType(DType dtype) {
     CASE(DType::kU16, xla::PrimitiveType::U16);
     CASE(DType::kU32, xla::PrimitiveType::U32);
     CASE(DType::kU64, xla::PrimitiveType::U64);
+    CASE(DType::kF4E2M1FN, xla::PrimitiveType::F4E2M1FN);
     CASE(DType::kF8E3M4, xla::PrimitiveType::F8E3M4);
     CASE(DType::kF8E4M3, xla::PrimitiveType::F8E4M3);
     CASE(DType::kF8E4M3FN, xla::PrimitiveType::F8E4M3FN);
@@ -83,6 +84,7 @@ absl::StatusOr<DType> ToDType(xla::PrimitiveType primitive_type) {
     case xla::PrimitiveType::U16:
     case xla::PrimitiveType::U32:
     case xla::PrimitiveType::U64:
+    case xla::PrimitiveType::F4E2M1FN:
     case xla::PrimitiveType::F8E3M4:
     case xla::PrimitiveType::F8E4M3:
     case xla::PrimitiveType::F8E4M3FN:
diff --git a/xla/python/py_values.cc b/xla/python/py_values.cc
@@ -184,6 +184,9 @@ absl::StatusOr<DevicePutResultFn> HandleNumpyScalar(
   } else if (std::is_same<T, bfloat16>()) {
     PyArray_ScalarAsCtype(h.ptr(), &data.template emplace<2>());
     type = BF16;
+  } else if (std::is_same<T, tsl::float4_e2m1fn>()) {
+    PyArray_ScalarAsCtype(h.ptr(), &data.template emplace<2>());
+    type = F4E2M1FN;
   } else if (std::is_same<T, tsl::float8_e3m4>()) {
     PyArray_ScalarAsCtype(h.ptr(), &data.template emplace<2>());
     type = F8E3M4;
@@ -398,6 +401,10 @@ absl::StatusOr<DevicePutResultFn> DevicePut(nb::handle arg,
         (*p)[dtypes.np_uint16.ptr()] = HandleNumpyScalar<uint16_t>;
         (*p)[dtypes.np_uint32.ptr()] = HandleNumpyScalar<uint32_t>;
         (*p)[dtypes.np_uint64.ptr()] = HandleNumpyScalar<uint64_t, uint32_t>;
+        if (dtypes.np_float4_e2m1fn.has_value()) {
+          (*p)[dtypes.np_float4_e2m1fn->ptr()] =
+              HandleNumpyScalar<tsl::float4_e2m1fn>;
+        }
         if (dtypes.np_float8_e3m4.has_value()) {
           (*p)[dtypes.np_float8_e3m4->ptr()] =
               HandleNumpyScalar<tsl::float8_e3m4>;
@@ -595,6 +602,7 @@ absl::StatusOr<PyArgSignature> PyArgSignatureOfValue(nb::handle arg,
         (*p)[dtypes.np_uint32.ptr()] = numpy_array_handler;
         (*p)[dtypes.np_uint64.ptr()] = np_uint64_handler;
         // TODO: Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+        // (*p)[dtypes.np_float4_e2m1fn.ptr()] = numpy_array_handler;
         // (*p)[dtypes.np_float8_e3m4.ptr()] = numpy_array_handler;
         // (*p)[dtypes.np_float8_e4m3.ptr()] = numpy_array_handler;
         (*p)[dtypes.np_float8_e4m3fn.ptr()] = numpy_array_handler;
diff --git a/xla/python/types.cc b/xla/python/types.cc
@@ -58,6 +58,7 @@ namespace {
 
 struct CustomDtypes {
   nb_dtype bfloat16;
+  std::optional<nb_dtype> float4_e2m1fn;
   std::optional<nb_dtype> float8_e3m4;
   std::optional<nb_dtype> float8_e4m3;
   nb_dtype float8_e4m3fn;
@@ -76,6 +77,10 @@ const CustomDtypes& GetCustomDtypes() {
     nb::module_ ml_dtypes = nb::module_::import_("ml_dtypes");
     auto* dtypes = new CustomDtypes;
     dtypes->bfloat16 = nb_dtype::from_args(ml_dtypes.attr("bfloat16"));
+    if (nb::hasattr(ml_dtypes, "float4_e2m1fn")) {
+      dtypes->float4_e2m1fn =
+          nb_dtype::from_args(ml_dtypes.attr("float4_e2m1fn"));
+    }
     if (nb::hasattr(ml_dtypes, "float8_e3m4")) {
       dtypes->float8_e3m4 = nb_dtype::from_args(ml_dtypes.attr("float8_e3m4"));
     }
@@ -147,6 +152,9 @@ absl::StatusOr<PrimitiveType> DtypeToPrimitiveType(const nb_dtype& np_type) {
     auto* map =
         new absl::flat_hash_map<nb_dtype, PrimitiveType, DtypeHash, DtypeEq>();
     map->emplace(custom_dtypes.bfloat16, BF16);
+    if (custom_dtypes.float4_e2m1fn.has_value()) {
+      map->emplace(*custom_dtypes.float4_e2m1fn, F4E2M1FN);
+    }
     if (custom_dtypes.float8_e3m4.has_value()) {
       map->emplace(*custom_dtypes.float8_e3m4, F8E3M4);
     }
@@ -217,6 +225,11 @@ absl::StatusOr<nb_dtype> PrimitiveTypeToNbDtype(PrimitiveType type) {
       return to_nb_dtype(NPY_UINT32);
     case U64:
       return to_nb_dtype(NPY_UINT64);
+    case F4E2M1FN:
+      if (custom_dtypes.float4_e2m1fn.has_value()) {
+        return *custom_dtypes.float4_e2m1fn;
+      }
+      break;
     case F8E3M4:
       if (custom_dtypes.float8_e3m4.has_value()) {
         return *custom_dtypes.float8_e3m4;
@@ -307,6 +320,11 @@ absl::StatusOr<nb_dtype> IfrtDtypeToNbDtype(ifrt::DType dtype) {
       return to_nb_dtype(NPY_COMPLEX64);
     case ifrt::DType::kC128:
       return to_nb_dtype(NPY_COMPLEX128);
+    case ifrt::DType::kF4E2M1FN:
+      if (custom_dtypes.float4_e2m1fn.has_value()) {
+        return *custom_dtypes.float4_e2m1fn;
+      }
+      break;
     case ifrt::DType::kF8E3M4:
       if (custom_dtypes.float8_e3m4.has_value()) {
         return *custom_dtypes.float8_e3m4;
@@ -380,6 +398,9 @@ const NumpyScalarTypes& GetNumpyScalarTypes() {
     dtypes->np_uint32 = nb::object(numpy.attr("uint32"));
     dtypes->np_uint64 = nb::object(numpy.attr("uint64"));
     dtypes->np_bfloat16 = nb::object(ml_dtypes.attr("bfloat16"));
+    if (nb::hasattr(ml_dtypes, "float4_e2m1fn")) {
+      dtypes->np_float4_e2m1fn = nb::object(ml_dtypes.attr("float4_e2m1fn"));
+    }
     if (nb::hasattr(ml_dtypes, "float8_e3m4")) {
       dtypes->np_float8_e3m4 = nb::object(ml_dtypes.attr("float8_e3m4"));
     }
diff --git a/xla/python/types.h b/xla/python/types.h
@@ -81,6 +81,7 @@ struct NumpyScalarTypes {
   nanobind::object np_uint64;
   nanobind::object np_bfloat16;
   // Remove std::optional once the minimum ml_dtypes in JAX is >= 0.5.0.
+  std::optional<nanobind::object> np_float4_e2m1fn;
   std::optional<nanobind::object> np_float8_e3m4;
   std::optional<nanobind::object> np_float8_e4m3;
   nanobind::object np_float8_e4m3fn;
diff --git a/xla/python/xla.cc b/xla/python/xla.cc
@@ -205,6 +205,7 @@ NB_MODULE(xla_extension, m) {
       .value("U64", U64)
       .value("F16", F16)
       // TODO: Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+      // .value("F4E2M1FN", F4E2M1FN)
       // .value("F8E3M4", F8E3M4)
       // .value("F8E4M3", F8E4M3)
       .value("F8E4M3FN", F8E4M3FN)
diff --git a/xla/python/xla_client.py b/xla/python/xla_client.py
@@ -280,6 +280,7 @@ def CurrentSourceInfoMetadata(op_type=None, op_name=None, skip_frames=1):
 
 bfloat16 = ml_dtypes.bfloat16
 # TODO(reedwm): Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+# float4_e2m1fn = ml_dtypes.float4_e2m1fn
 # float8_e3m4 = ml_dtypes.float8_e3m4
 # float8_e4m3 = ml_dtypes.float8_e4m3
 float8_e4m3fn = ml_dtypes.float8_e4m3fn
@@ -301,6 +302,7 @@ def CurrentSourceInfoMetadata(op_type=None, op_name=None, skip_frames=1):
     PrimitiveType.U32: np.dtype('uint32'),
     PrimitiveType.U64: np.dtype('uint64'),
     # TODO(reedwm): Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+    # PrimitiveType.F4E2M1FN: np.dtype(float4_e2m1fn),
     # PrimitiveType.F8E3M4: np.dtype(float8_e3m4),
     # PrimitiveType.F8E4M3: np.dtype(float8_e4m3),
     PrimitiveType.F8E4M3FN: np.dtype(float8_e4m3fn),
diff --git a/xla/python/xla_client.pyi b/xla/python/xla_client.pyi
@@ -62,6 +62,7 @@ mlir_api_version: int
 
 bfloat16: type[numpy.generic]
 # TODO: Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+# float4_e2m1fn: type[numpy.generic]
 # float8_e3m4: type[numpy.generic]
 # float8_e4m3: type[numpy.generic]
 float8_e4m3fn: type[numpy.generic]
diff --git a/xla/python/xla_client_test.py b/xla/python/xla_client_test.py
@@ -55,6 +55,7 @@
 
 bfloat16 = xla_client.bfloat16
 # TODO(reedwm): Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
+# float4_e2m1fn = xla_client.float4_e2m1fn
 # float8_e3m4 = xla_client.float8_e3m4
 # float8_e4m3 = xla_client.float8_e4m3
 float8_e4m3fn = xla_client.float8_e4m3fn
@@ -189,7 +190,7 @@ def TestFactory(xla_backend,
   fp8_dtypes = [float8_e4m3b11fnuz, float8_e4m3fn, float8_e5m2]
   standard_dtypes += fp8_dtypes
   # TODO(reedwm): Uncomment once the minimum ml_dtypes in JAX is >= 0.5.0.
-  # standard_dtypes += [float8_e3m4, float8_e4m3]
+  # standard_dtypes += [float4_e2m1fn, float8_e3m4, float8_e4m3]
   dlpack_dtypes = int_dtypes + float_dtypes + [np.bool_] + complex_dtypes
 
   class ComputationTest(parameterized.TestCase):
diff --git a/xla/python/xla_extension/__init__.pyi b/xla/python/xla_extension/__init__.pyi
@@ -74,6 +74,7 @@ class PrimitiveType(enum.IntEnum):
   U16: PrimitiveType
   U32: PrimitiveType
   U64: PrimitiveType
+  F4E2M1FN: PrimitiveType
   F8E3M4: PrimitiveType
   F8E4M3: PrimitiveType
   F8E4M3FN: PrimitiveType
diff --git a/xla/tsl/python/lib/core/ml_dtypes.cc b/xla/tsl/python/lib/core/ml_dtypes.cc
@@ -61,6 +61,8 @@ struct MlDtypesInitInfo {
 
       numpy_dtypes.bfloat16 =
           py::dtype::from_args(ml_dtypes.attr("bfloat16")).num();
+      numpy_dtypes.float4_e2m1fn =
+          py::dtype::from_args(ml_dtypes.attr("float4_e2m1fn")).num();
       numpy_dtypes.float8_e3m4 =
           py::dtype::from_args(ml_dtypes.attr("float8_e3m4")).num();
       numpy_dtypes.float8_e4m3 =
@@ -85,6 +87,7 @@ struct MlDtypesInitInfo {
 
     // Verify all types were successfully loaded.
     if (numpy_dtypes.bfloat16 == NPY_NOTYPE ||
+        numpy_dtypes.float4_e2m1fn == NPY_NOTYPE ||
         numpy_dtypes.float8_e3m4 == NPY_NOTYPE ||
         numpy_dtypes.float8_e4m3 == NPY_NOTYPE ||
         numpy_dtypes.float8_e4m3fn == NPY_NOTYPE ||
diff --git a/xla/tsl/python/lib/core/ml_dtypes.h b/xla/tsl/python/lib/core/ml_dtypes.h
@@ -24,6 +24,7 @@ namespace ml_dtypes {
 
 struct NumpyDtypes {
   int bfloat16;
+  int float4_e2m1fn;
   int float8_e3m4;
   int float8_e4m3;
   int float8_e4m3fn;