Skip to content

CLN: unify NumpyBlock, ObjectBlock, and NumericBlock #52817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
Block,
DatetimeTZBlock,
ExtensionBlock,
NumericBlock,
ObjectBlock,
)
from pandas.core.internals.concat import concatenate_managers
from pandas.core.internals.managers import (
Expand All @@ -23,10 +21,8 @@

__all__ = [
"Block",
"NumericBlock",
"DatetimeTZBlock",
"ExtensionBlock",
"ObjectBlock",
"make_block",
"DataManager",
"ArrayManager",
Expand All @@ -38,3 +34,27 @@
# this is preserved here for downstream compatibility (GH-33892)
"create_block_manager_from_blocks",
]


def __getattr__(name: str):
import warnings

from pandas.util._exceptions import find_stack_level

if name in ["NumericBlock", "ObjectBlock"]:
warnings.warn(
f"{name} is deprecated and will be removed in a future version. "
"Use public APIs instead.",
DeprecationWarning,
stacklevel=find_stack_level(),
)
if name == "NumericBlock":
from pandas.core.internals.blocks import NumericBlock

return NumericBlock
else:
from pandas.core.internals.blocks import ObjectBlock

return ObjectBlock

raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
109 changes: 53 additions & 56 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,36 @@ def convert(
using_cow: bool = False,
) -> list[Block]:
"""
attempt to coerce any object types to better types return a copy
of the block (if copy = True) by definition we are not an ObjectBlock
here!
Attempt to coerce any object types to better types. Return a copy
of the block (if copy = True).
"""
if not copy and using_cow:
return [self.copy(deep=False)]
return [self.copy()] if copy else [self]
if not self.is_object:
if not copy and using_cow:
return [self.copy(deep=False)]
return [self.copy()] if copy else [self]

if self.ndim != 1 and self.shape[0] != 1:
return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow)

values = self.values
if values.ndim == 2:
# the check above ensures we only get here with values.shape[0] == 1,
# avoid doing .ravel as that might make a copy
values = values[0]

res_values = lib.maybe_convert_objects(
values, # type: ignore[arg-type]
convert_non_numeric=True,
)
refs = None
if copy and res_values is values:
res_values = values.copy()
elif res_values is values and using_cow:
refs = self.refs

res_values = ensure_block_shape(res_values, self.ndim)
res_values = maybe_coerce_values(res_values)
return [self.make_block(res_values, refs=refs)]

# ---------------------------------------------------------------------
# Array-Like Methods
Expand Down Expand Up @@ -680,7 +703,7 @@ def _replace_regex(
List[Block]
"""
if not self._can_hold_element(to_replace):
# i.e. only ObjectBlock, but could in principle include a
# i.e. only if self.is_object is True, but could in principle include a
# String ExtensionBlock
if using_cow:
return [self.copy(deep=False)]
Expand Down Expand Up @@ -1273,7 +1296,7 @@ def fillna(
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
ObjectBlock and try again
block to hold objects instead and try again
"""
# Caller is responsible for validating limit; if int it is strictly positive
inplace = validate_bool_kwarg(inplace, "inplace")
Expand Down Expand Up @@ -2064,7 +2087,7 @@ def _unstack(
needs_masking: npt.NDArray[np.bool_],
):
# ExtensionArray-safe unstack.
# We override ObjectBlock._unstack, which unstacks directly on the
# We override Block._unstack, which unstacks directly on the
# values of the array. For EA-backed blocks, this would require
# converting to a 2-D ndarray of objects.
# Instead, we unstack an ndarray of integer positions, followed by
Expand Down Expand Up @@ -2100,6 +2123,7 @@ def _unstack(

class NumpyBlock(libinternals.NumpyBlock, Block):
values: np.ndarray
__slots__ = ()

@property
def is_view(self) -> bool:
Expand All @@ -2118,10 +2142,28 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
def values_for_json(self) -> np.ndarray:
return self.values

@cache_readonly
def is_numeric(self) -> bool: # type: ignore[override]
dtype = self.values.dtype
kind = dtype.kind

return kind in "fciub"

@cache_readonly
def is_object(self) -> bool: # type: ignore[override]
return self.values.dtype.kind == "O"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there dtypes other than object-dtype that have kind=="O"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



class NumericBlock(NumpyBlock):
# this Block type is kept for backwards-compatibility
# TODO(3.0): delete and remove deprecation in __init__.py.
__slots__ = ()


class ObjectBlock(NumpyBlock):
# this Block type is kept for backwards-compatibility
# TODO(3.0): delete and remove deprecation in __init__.py.
__slots__ = ()
is_numeric = True


class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
Expand Down Expand Up @@ -2257,49 +2299,6 @@ class DatetimeTZBlock(DatetimeLikeBlock):
values_for_json = NDArrayBackedExtensionBlock.values_for_json


class ObjectBlock(NumpyBlock):
__slots__ = ()
is_object = True

@maybe_split
def convert(
self,
*,
copy: bool = True,
using_cow: bool = False,
) -> list[Block]:
"""
attempt to cast any object types to better types return a copy of
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
"""
if self.dtype != _dtype_obj:
# GH#50067 this should be impossible in ObjectBlock, but until
# that is fixed, we short-circuit here.
if using_cow:
return [self.copy(deep=False)]
return [self]

values = self.values
if values.ndim == 2:
# maybe_split ensures we only get here with values.shape[0] == 1,
# avoid doing .ravel as that might make a copy
values = values[0]

res_values = lib.maybe_convert_objects(
values,
convert_non_numeric=True,
)
refs = None
if copy and res_values is values:
res_values = values.copy()
elif res_values is values and using_cow:
refs = self.refs

res_values = ensure_block_shape(res_values, self.ndim)
res_values = maybe_coerce_values(res_values)
return [self.make_block(res_values, refs=refs)]


# -----------------------------------------------------------------
# Constructor Helpers

Expand Down Expand Up @@ -2358,10 +2357,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]:
kind = dtype.kind
if kind in "Mm":
return DatetimeLikeBlock
elif kind in "fciub":
return NumericBlock

return ObjectBlock
return NumpyBlock


def new_block_2d(
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/extension/base/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas.util._test_decorators as td

import pandas as pd
from pandas.core.internals import ObjectBlock
from pandas.core.internals.blocks import NumpyBlock
from pandas.tests.extension.base.base import BaseExtensionTests


Expand All @@ -16,7 +16,9 @@ def test_astype_object_series(self, all_data):
result = ser.astype(object)
assert result.dtype == np.dtype(object)
if hasattr(result._mgr, "blocks"):
assert isinstance(result._mgr.blocks[0], ObjectBlock)
blk = result._mgr.blocks[0]
assert isinstance(blk, NumpyBlock)
assert blk.is_object
assert isinstance(result._mgr.array, np.ndarray)
assert result._mgr.array.dtype == np.dtype(object)

Expand All @@ -26,7 +28,8 @@ def test_astype_object_frame(self, all_data):
result = df.astype(object)
if hasattr(result._mgr, "blocks"):
blk = result._mgr.blocks[0]
assert isinstance(blk, ObjectBlock), type(blk)
assert isinstance(blk, NumpyBlock), type(blk)
assert blk.is_object
assert isinstance(result._mgr.arrays[0], np.ndarray)
assert result._mgr.arrays[0].dtype == np.dtype(object)

Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@
option_context,
)
import pandas._testing as tm
from pandas.core.internals import (
NumericBlock,
ObjectBlock,
)
from pandas.core.internals.blocks import NumpyBlock

# Segregated collection of methods that require the BlockManager internal data
# structure
Expand Down Expand Up @@ -387,7 +384,8 @@ def test_constructor_no_pandas_array(self):
result = DataFrame({"A": arr})
expected = DataFrame({"A": [1, 2, 3]})
tm.assert_frame_equal(result, expected)
assert isinstance(result._mgr.blocks[0], NumericBlock)
assert isinstance(result._mgr.blocks[0], NumpyBlock)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check is_numeric here?

assert result._mgr.blocks[0].is_numeric

def test_add_column_with_pandas_array(self):
# GH 26390
Expand All @@ -400,8 +398,10 @@ def test_add_column_with_pandas_array(self):
"c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
}
)
assert type(df["c"]._mgr.blocks[0]) == ObjectBlock
assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock
assert type(df["c"]._mgr.blocks[0]) == NumpyBlock
assert df["c"]._mgr.blocks[0].is_object
assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock
assert df2["c"]._mgr.blocks[0].is_object
tm.assert_frame_equal(df, df2)


Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/internals/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ def test_namespace():
]
expected = [
"Block",
"NumericBlock",
"DatetimeTZBlock",
"ExtensionBlock",
"ObjectBlock",
"make_block",
"DataManager",
"ArrayManager",
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
IntervalArray,
period_array,
)
from pandas.core.internals.blocks import NumericBlock
from pandas.core.internals.blocks import NumpyBlock


class TestSeriesConstructors:
Expand Down Expand Up @@ -2098,7 +2098,8 @@ def test_constructor_no_pandas_array(self, using_array_manager):
result = Series(ser.array)
tm.assert_series_equal(ser, result)
if not using_array_manager:
assert isinstance(result._mgr.blocks[0], NumericBlock)
assert isinstance(result._mgr.blocks[0], NumpyBlock)
assert result._mgr.blocks[0].is_numeric

@td.skip_array_manager_invalid_test
def test_from_array(self):
Expand Down