BUG: FloatingArray(float16data) (#44715)

jbrockmendel · web-flow · commit 5302d1b63cd6 · 2021-12-04T18:51:30.000-05:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -812,6 +812,8 @@ ExtensionArray
 - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
 - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
 - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
+- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
+- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
 - Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
 -
 
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -254,6 +254,10 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
                 "values should be floating numpy array. Use "
                 "the 'pd.array' function instead"
             )
+        if values.dtype == np.float16:
+            # If we don't raise here, then accessing self.dtype would raise
+            raise TypeError("FloatingArray does not support np.float16 dtype.")
+
         super().__init__(values, mask, copy=copy)
 
     @classmethod
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -466,6 +466,10 @@ def reconstruct(x):
                 return IntegerArray(x, m)
             elif is_float_dtype(x.dtype):
                 m = mask.copy()
+                if x.dtype == np.float16:
+                    # reached in e.g. np.sqrt on BooleanArray
+                    # we don't support float16
+                    x = x.astype(np.float32)
                 return FloatingArray(x, m)
             else:
                 x[mask] = np.nan
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -353,7 +353,12 @@ def array(
         elif inferred_dtype == "integer":
             return IntegerArray._from_sequence(data, copy=copy)
 
-        elif inferred_dtype in ("floating", "mixed-integer-float"):
+        elif (
+            inferred_dtype in ("floating", "mixed-integer-float")
+            and getattr(data, "dtype", None) != np.float16
+        ):
+            # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
+            #  we will fall back to PandasArray.
             return FloatingArray._from_sequence(data, copy=copy)
 
         elif inferred_dtype == "boolean":
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
@@ -66,6 +66,17 @@ def test_ufuncs_unary(ufunc):
     tm.assert_series_equal(result, expected)
 
 
+def test_ufunc_numeric():
+    # np.sqrt on np.bool returns float16, which we upcast to Float32
+    #  bc we do not have Float16
+    arr = pd.array([True, False, None], dtype="boolean")
+
+    res = np.sqrt(arr)
+
+    expected = pd.array([1, 0, None], dtype="Float32")
+    tm.assert_extension_array_equal(res, expected)
+
+
 @pytest.mark.parametrize("values", [[True, False], [True, None]])
 def test_ufunc_reduce_raises(values):
     a = pd.array(values, dtype="boolean")
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
@@ -1,6 +1,13 @@
+import locale
+
 import numpy as np
 import pytest
 
+from pandas.compat import (
+    is_platform_windows,
+    np_version_under1p19,
+)
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import FloatingArray
@@ -40,6 +47,33 @@ def test_floating_array_constructor():
         FloatingArray(values)
 
 
+def test_floating_array_disallows_float16(request):
+    # GH#44715
+    arr = np.array([1, 2], dtype=np.float16)
+    mask = np.array([False, False])
+
+    msg = "FloatingArray does not support np.float16 dtype"
+    with pytest.raises(TypeError, match=msg):
+        FloatingArray(arr, mask)
+
+    if not np_version_under1p19:
+        # Troubleshoot
+        #  https://github.com/numpy/numpy/issues/20512#issuecomment-985807740
+        lowered = np.core._type_aliases.english_lower("Float16")
+        assert lowered == "float16", lowered
+
+    if np_version_under1p19 or (
+        locale.getlocale()[0] != "en_US" and not is_platform_windows()
+    ):
+        # the locale condition may need to be refined; this fails on
+        #  the CI in the ZH_CN build
+        mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')")
+        request.node.add_marker(mark)
+
+    with pytest.raises(TypeError, match="data type 'Float16' not understood"):
+        pd.array([1.0, 2.0], dtype="Float16")
+
+
 def test_floating_array_constructor_copy():
     values = np.array([1, 2, 3, 4], dtype="float64")
     mask = np.array([False, False, False, True], dtype="bool")
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -51,6 +51,12 @@
         # String alias passes through to NumPy
         ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
         ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))),
+        # GH#44715 FloatingArray does not support float16, so fall back to PandasArray
+        (
+            np.array([1, 2], dtype=np.float16),
+            None,
+            PandasArray(np.array([1, 2], dtype=np.float16)),
+        ),
         # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64"))
         (
             PandasArray(np.array([1, 2], dtype=np.int32)),

Original file line number	Diff line number	Diff line change
`@@ -812,6 +812,8 @@ ExtensionArray`
`812`	`812`	- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
`813`	`813`	- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
`814`	`814`	- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
	`815`	+- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
	`816`	+- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
`815`	`817`	- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
`816`	`818`	`-`
`817`	`819`