Skip to content

[ArrayManager] TST: Enable extension tests #40348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ jobs:
pytest pandas/tests/computation/
pytest pandas/tests/config/
pytest pandas/tests/dtypes/
pytest pandas/tests/extension/
pytest pandas/tests/generic/
pytest pandas/tests/indexes/
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,10 @@ def get_numeric_data(self, copy: bool = False) -> ArrayManager:
copy : bool, default False
Whether to copy the blocks
"""
return self._get_data_subset(lambda arr: is_numeric_dtype(arr.dtype))
return self._get_data_subset(
lambda arr: is_numeric_dtype(arr.dtype)
or getattr(arr.dtype, "_is_numeric", False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like is_numeric_dtype should catch this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It maybe should, but it doesn't at the moment.

The above is mimicking the logic of the BlockManager (ExtensionBlock.is_numeric checks this attribute of the dtype). But it's certainly inconsistent .. The consequence of the inconsistency is that eg decimal is considered numeric for groupby operations, but is_numeric_dtype(decimal_dtype) will return False (not sure where this would have a direct impact).

)

def copy(self: T, deep=True) -> T:
"""
Expand Down
13 changes: 10 additions & 3 deletions pandas/tests/extension/base/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@ class BaseCastingTests(BaseExtensionTests):
def test_astype_object_series(self, all_data):
ser = pd.Series(all_data, name="A")
result = ser.astype(object)
assert isinstance(result._mgr.blocks[0], ObjectBlock)
assert result.dtype == np.dtype(object)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ObjectBlock)
assert isinstance(result._mgr.array, np.ndarray)
assert result._mgr.array.dtype == np.dtype(object)

def test_astype_object_frame(self, all_data):
df = pd.DataFrame({"A": all_data})

result = df.astype(object)
blk = result._data.blocks[0]
assert isinstance(blk, ObjectBlock), type(blk)
if hasattr(result._mgr, "blocks"):
blk = result._data.blocks[0]
assert isinstance(blk, ObjectBlock), type(blk)
assert isinstance(result._mgr.arrays[0], np.ndarray)
assert result._mgr.arrays[0].dtype == np.dtype(object)

# FIXME: these currently fail; dont leave commented-out
# check that we can compare the dtypes
Expand Down
17 changes: 12 additions & 5 deletions pandas/tests/extension/base/constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

import pandas as pd
from pandas.api.extensions import ExtensionArray
from pandas.core.internals import ExtensionBlock
from pandas.tests.extension.base.base import BaseExtensionTests

Expand All @@ -24,13 +25,15 @@ def test_series_constructor(self, data):
result = pd.Series(data)
assert result.dtype == data.dtype
assert len(result) == len(data)
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
assert result._mgr.blocks[0].values is data
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
assert result._mgr.array is data

# Series[EA] is unboxed / boxed correctly
result2 = pd.Series(result)
assert result2.dtype == data.dtype
assert isinstance(result2._mgr.blocks[0], ExtensionBlock)
if hasattr(result._mgr, "blocks"):
assert isinstance(result2._mgr.blocks[0], ExtensionBlock)

def test_series_constructor_no_data_with_index(self, dtype, na_value):
result = pd.Series(index=[1, 2, 3], dtype=dtype)
Expand Down Expand Up @@ -64,13 +67,17 @@ def test_dataframe_constructor_from_dict(self, data, from_series):
result = pd.DataFrame({"A": data})
assert result.dtypes["A"] == data.dtype
assert result.shape == (len(data), 1)
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)

def test_dataframe_from_series(self, data):
result = pd.DataFrame(pd.Series(data))
assert result.dtypes[0] == data.dtype
assert result.shape == (len(data), 1)
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)

def test_series_given_mismatched_index_raises(self, data):
msg = r"Length of values \(3\) does not match length of index \(5\)"
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,10 @@ def test_loc_len1(self, data):
# see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim
df = pd.DataFrame({"A": data})
res = df.loc[[0], "A"]
assert res._mgr._block.ndim == 1
assert res.ndim == 1
assert res._mgr.arrays[0].ndim == 1
if hasattr(res._mgr, "blocks"):
assert res._mgr._block.ndim == 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think OK to just check res._mgr.ndim (or even just res.ndim)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a check for the ndim of mgr (so there is also a check for ArrayManager), but left the block ndim check in place, because the original bug was that only the block ndim was wrong (the manager ndim was correct)


def test_item(self, data):
# https://github.com/pandas-dev/pandas/pull/30175
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/base/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def test_no_values_attribute(self, data):

def test_is_numeric_honored(self, data):
result = pd.Series(data)
assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric
if hasattr(result._mgr, "blocks"):
assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric

def test_isna_extension_array(self, data_missing):
# If your `isna` returns an ExtensionArray, you must also implement
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas.api.extensions import ExtensionArray
from pandas.core.internals import ExtensionBlock
from pandas.tests.extension.base.base import BaseExtensionTests

Expand All @@ -26,7 +29,9 @@ def test_concat(self, data, in_frame):
dtype = result.dtype

assert dtype == data.dtype
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ExtensionBlock)
assert isinstance(result._mgr.arrays[0], ExtensionArray)

@pytest.mark.parametrize("in_frame", [True, False])
def test_concat_all_na_block(self, data_missing, in_frame):
Expand Down Expand Up @@ -106,6 +111,7 @@ def test_concat_extension_arrays_copy_false(self, data, na_value):
result = pd.concat([df1, df2], axis=1, copy=False)
self.assert_frame_equal(result, expected)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) concat reindex
def test_concat_with_reindex(self, data):
# GH-33027
a = pd.DataFrame({"a": data[:5]})
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/extension/test_external_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import pytest

from pandas._libs.internals import BlockPlacement
import pandas.util._test_decorators as td

import pandas as pd
from pandas.core.internals import BlockManager
from pandas.core.internals.blocks import ExtensionBlock

pytestmark = td.skip_array_manager_invalid_test


class CustomBlock(ExtensionBlock):

Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.dtypes import (
ExtensionDtype,
PandasDtype,
Expand All @@ -28,6 +30,9 @@
from pandas.core.internals import managers
from pandas.tests.extension import base

# TODO(ArrayManager) PandasArray
pytestmark = td.skip_array_manager_not_yet_implemented


def _extract_array_patched(obj):
if isinstance(obj, (pd.Index, pd.Series)):
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,8 @@ def test_fillna_copy_frame(self, data_missing):
filled_val = df.iloc[0, 0]
result = df.fillna(filled_val)

assert df.values.base is not result.values.base
if hasattr(df._mgr, "blocks"):
assert df.values.base is not result.values.base
assert df.A._values.to_dense() is arr.to_dense()

def test_fillna_copy_series(self, data_missing):
Expand Down Expand Up @@ -362,18 +363,19 @@ def test_equals(self, data, na_value, as_series, box):
class TestCasting(BaseSparseTests, base.BaseCastingTests):
def test_astype_object_series(self, all_data):
# Unlike the base class, we do not expect the resulting Block
# to be ObjectBlock
# to be ObjectBlock / resulting array to be np.dtype("object")
ser = pd.Series(all_data, name="A")
result = ser.astype(object)
assert is_object_dtype(result._data.blocks[0].dtype)
assert is_object_dtype(result.dtype)
assert is_object_dtype(result._mgr.array.dtype)

def test_astype_object_frame(self, all_data):
# Unlike the base class, we do not expect the resulting Block
# to be ObjectBlock
# to be ObjectBlock / resulting array to be np.dtype("object")
df = pd.DataFrame({"A": all_data})

result = df.astype(object)
assert is_object_dtype(result._data.blocks[0].dtype)
assert is_object_dtype(result._mgr.arrays[0].dtype)

# FIXME: these currently fail; dont leave commented-out
# check that we can compare the dtypes
Expand Down