Skip to content

ENH: make is_list_like handle non iterable numpy-like arrays correctly #35127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1045,10 +1045,10 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
return (
isinstance(obj, abc.Iterable)
# avoid numpy-style scalars
and not (hasattr(obj, "ndim") and obj.ndim == 0)
# we do not count strings/unicode/bytes as list-like
and not isinstance(obj, (str, bytes))
# exclude zero-dimensional numpy arrays, effectively scalars
and not (util.is_array(obj) and obj.ndim == 0)
# exclude sets if allow_sets is False
and not (allow_sets is False and isinstance(obj, abc.Set))
)
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/testing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ from pandas._libs.lib import is_complex
from pandas._libs.util cimport is_array, is_real_number_object

from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.inference import is_array_like
from pandas.core.dtypes.missing import array_equivalent, isna


Expand Down Expand Up @@ -99,7 +100,9 @@ cpdef assert_almost_equal(a, b,
return True

a_is_ndarray = is_array(a)
a_has_size_and_shape = hasattr(a, "size") and hasattr(a, "shape")
b_is_ndarray = is_array(b)
b_has_size_and_shape = hasattr(b, "size") and hasattr(b, "shape")

if obj is None:
if a_is_ndarray or b_is_ndarray:
Expand All @@ -119,7 +122,7 @@ cpdef assert_almost_equal(a, b,
f"Can't compare objects without length, one or both is invalid: ({a}, {b})"
)

if a_is_ndarray and b_is_ndarray:
if (a_is_ndarray and b_is_ndarray) or (a_has_size_and_shape and b_has_size_and_shape):
na, nb = a.size, b.size
if a.shape != b.shape:
from pandas._testing import raise_assert_detail
Expand Down
146 changes: 119 additions & 27 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,52 @@ def coerce(request):
return request.param


class MockNumpyLikeArray:
"""
A class which is numpy-like (e.g. Pint's Quantity) but not actually numpy

The key is that it is not actually a numpy array so
``util.is_array(mock_numpy_like_array_instance)`` returns ``False``. Other
important properties are that the class defines a :meth:`__iter__` method
(so that ``isinstance(abc.Iterable)`` returns ``True``) and has a
:meth:`ndim` property which can be used as a check for whether it is a
scalar or not.
"""

def __init__(self, values):
self._values = values

def __iter__(self):
iter_values = iter(self._values)

def it_outer():
yield from iter_values

return it_outer()

def __len__(self):
return len(self._values)

def __array__(self, t=None):
return self._values

@property
def ndim(self):
return self._values.ndim

@property
def dtype(self):
return self._values.dtype

@property
def size(self):
return self._values.size

@property
def shape(self):
return self._values.shape


# collect all objects to be tested for list-like-ness; use tuples of objects,
# whether they are list-like or not (special casing for sets), and their ID
ll_params = [
Expand Down Expand Up @@ -94,6 +140,15 @@ def coerce(request):
(np.ndarray((2,) * 4), True, "ndarray-4d"),
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
(np.array(2), False, "ndarray-0d"),
(MockNumpyLikeArray(np.ndarray((2,) * 1)), True, "duck-ndarray-1d"),
(MockNumpyLikeArray(np.array([])), True, "duck-ndarray-1d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 2)), True, "duck-ndarray-2d"),
(MockNumpyLikeArray(np.array([[]])), True, "duck-ndarray-2d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 3)), True, "duck-ndarray-3d"),
(MockNumpyLikeArray(np.array([[[]]])), True, "duck-ndarray-3d-empty"),
(MockNumpyLikeArray(np.ndarray((2,) * 4)), True, "duck-ndarray-4d"),
(MockNumpyLikeArray(np.array([[[[]]]])), True, "duck-ndarray-4d-empty"),
(MockNumpyLikeArray(np.array(2)), False, "duck-ndarray-0d"),
(1, False, "int"),
(b"123", False, "bytes"),
(b"", False, "bytes-empty"),
Expand Down Expand Up @@ -154,6 +209,8 @@ def test_is_array_like():
assert inference.is_array_like(Series([1, 2]))
assert inference.is_array_like(np.array(["a", "b"]))
assert inference.is_array_like(Index(["2016-01-01"]))
assert inference.is_array_like(np.array([2, 3]))
assert inference.is_array_like(MockNumpyLikeArray(np.array([2, 3])))

class DtypeList(list):
dtype = "special"
Expand All @@ -166,6 +223,23 @@ class DtypeList(list):
assert not inference.is_array_like(123)


def test_assert_almost_equal():
tm.assert_almost_equal(np.array(2), np.array(2))
eg = MockNumpyLikeArray(np.array(2))
tm.assert_almost_equal(eg, eg)


@pytest.mark.parametrize(
"eg",
(
np.array(2),
MockNumpyLikeArray(np.array(2)),
),
)
def test_assert_almost_equal(eg):
tm.assert_almost_equal(eg, eg)


@pytest.mark.parametrize(
"inner",
[
Expand Down Expand Up @@ -1427,34 +1501,52 @@ def test_is_scalar_builtin_nonscalars(self):
assert not is_scalar(slice(None))
assert not is_scalar(Ellipsis)

def test_is_scalar_numpy_array_scalars(self):
assert is_scalar(np.int64(1))
assert is_scalar(np.float64(1.0))
assert is_scalar(np.int32(1))
assert is_scalar(np.complex64(2))
assert is_scalar(np.object_("foobar"))
assert is_scalar(np.str_("foobar"))
assert is_scalar(np.unicode_("foobar"))
assert is_scalar(np.bytes_(b"foobar"))
assert is_scalar(np.datetime64("2014-01-01"))
assert is_scalar(np.timedelta64(1, "h"))

def test_is_scalar_numpy_zerodim_arrays(self):
for zerodim in [
np.array(1),
np.array("foobar"),
np.array(np.datetime64("2014-01-01")),
np.array(np.timedelta64(1, "h")),
np.array(np.datetime64("NaT")),
]:
assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))

@pytest.mark.parametrize("start", (
np.int64(1),
np.float64(1.0),
np.int32(1),
np.complex64(2),
np.object_("foobar"),
np.str_("foobar"),
np.unicode_("foobar"),
np.bytes_(b"foobar"),
np.datetime64("2014-01-01"),
np.timedelta64(1, "h"),
))
@pytest.mark.parametrize("numpy_like", (True, False))
def test_is_scalar_numpy_array_scalars(self, start, numpy_like):
if numpy_like:
start = MockNumpyLikeArray(start)

assert is_scalar(start)

@pytest.mark.parametrize("zerodim", (
np.array(1),
np.array("foobar"),
np.array(np.datetime64("2014-01-01")),
np.array(np.timedelta64(1, "h")),
np.array(np.datetime64("NaT")),
))
@pytest.mark.parametrize("numpy_like", (True, False))
def test_is_scalar_numpy_zerodim_arrays(self, zerodim, numpy_like):
if numpy_like:
zerodim = MockNumpyLikeArray(zerodim)

assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))

@pytest.mark.parametrize("start", (
np.array([]),
np.array([[]]),
np.matrix("1; 2"),
))
@pytest.mark.parametrize("numpy_like", (True, False))
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scalar_numpy_arrays(self):
assert not is_scalar(np.array([]))
assert not is_scalar(np.array([[]]))
assert not is_scalar(np.matrix("1; 2"))
def test_is_scalar_numpy_arrays(self, start, numpy_like):
if numpy_like:
start = MockNumpyLikeArray(start)

assert not is_scalar(start)

def test_is_scalar_pandas_scalars(self):
assert is_scalar(Timestamp("2014-01-01"))
Expand Down