Skip to content

Commit c95446d

Browse files
ngoldbaumtopper-123
authored andcommitted
CLN: unify NumpyBlock, ObjectBlock, and NumericBlock (pandas-dev#52817)
* CLN: unify NumpyBlock, ObjectBlock, and NumericBlock * CLN: respond to review comments * CLN: deprecate ObjectBlock and NumericBlock * CLN: appease mypy * CLN: remove out-of-date reference to maybe_split * CLN: respond to review comments * CLN: remove NumpyBlock from the semi-public API * CLN: test for is_numeric in block internals tests
1 parent 52e68bc commit c95446d

File tree

6 files changed

+93
-74
lines changed

6 files changed

+93
-74
lines changed

pandas/core/internals/__init__.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
Block,
1212
DatetimeTZBlock,
1313
ExtensionBlock,
14-
NumericBlock,
15-
ObjectBlock,
1614
)
1715
from pandas.core.internals.concat import concatenate_managers
1816
from pandas.core.internals.managers import (
@@ -23,10 +21,8 @@
2321

2422
__all__ = [
2523
"Block",
26-
"NumericBlock",
2724
"DatetimeTZBlock",
2825
"ExtensionBlock",
29-
"ObjectBlock",
3026
"make_block",
3127
"DataManager",
3228
"ArrayManager",
@@ -38,3 +34,27 @@
3834
# this is preserved here for downstream compatibility (GH-33892)
3935
"create_block_manager_from_blocks",
4036
]
37+
38+
39+
def __getattr__(name: str):
40+
import warnings
41+
42+
from pandas.util._exceptions import find_stack_level
43+
44+
if name in ["NumericBlock", "ObjectBlock"]:
45+
warnings.warn(
46+
f"{name} is deprecated and will be removed in a future version. "
47+
"Use public APIs instead.",
48+
DeprecationWarning,
49+
stacklevel=find_stack_level(),
50+
)
51+
if name == "NumericBlock":
52+
from pandas.core.internals.blocks import NumericBlock
53+
54+
return NumericBlock
55+
else:
56+
from pandas.core.internals.blocks import ObjectBlock
57+
58+
return ObjectBlock
59+
60+
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

Lines changed: 53 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -468,13 +468,36 @@ def convert(
468468
using_cow: bool = False,
469469
) -> list[Block]:
470470
"""
471-
attempt to coerce any object types to better types return a copy
472-
of the block (if copy = True) by definition we are not an ObjectBlock
473-
here!
471+
Attempt to coerce any object types to better types. Return a copy
472+
of the block (if copy = True).
474473
"""
475-
if not copy and using_cow:
476-
return [self.copy(deep=False)]
477-
return [self.copy()] if copy else [self]
474+
if not self.is_object:
475+
if not copy and using_cow:
476+
return [self.copy(deep=False)]
477+
return [self.copy()] if copy else [self]
478+
479+
if self.ndim != 1 and self.shape[0] != 1:
480+
return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow)
481+
482+
values = self.values
483+
if values.ndim == 2:
484+
# the check above ensures we only get here with values.shape[0] == 1,
485+
# avoid doing .ravel as that might make a copy
486+
values = values[0]
487+
488+
res_values = lib.maybe_convert_objects(
489+
values, # type: ignore[arg-type]
490+
convert_non_numeric=True,
491+
)
492+
refs = None
493+
if copy and res_values is values:
494+
res_values = values.copy()
495+
elif res_values is values and using_cow:
496+
refs = self.refs
497+
498+
res_values = ensure_block_shape(res_values, self.ndim)
499+
res_values = maybe_coerce_values(res_values)
500+
return [self.make_block(res_values, refs=refs)]
478501

479502
# ---------------------------------------------------------------------
480503
# Array-Like Methods
@@ -679,7 +702,7 @@ def _replace_regex(
679702
List[Block]
680703
"""
681704
if not self._can_hold_element(to_replace):
682-
# i.e. only ObjectBlock, but could in principle include a
705+
# i.e. only if self.is_object is True, but could in principle include a
683706
# String ExtensionBlock
684707
if using_cow:
685708
return [self.copy(deep=False)]
@@ -1272,7 +1295,7 @@ def fillna(
12721295
) -> list[Block]:
12731296
"""
12741297
fillna on the block with the value. If we fail, then convert to
1275-
ObjectBlock and try again
1298+
block to hold objects instead and try again
12761299
"""
12771300
# Caller is responsible for validating limit; if int it is strictly positive
12781301
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -2063,7 +2086,7 @@ def _unstack(
20632086
needs_masking: npt.NDArray[np.bool_],
20642087
):
20652088
# ExtensionArray-safe unstack.
2066-
# We override ObjectBlock._unstack, which unstacks directly on the
2089+
# We override Block._unstack, which unstacks directly on the
20672090
# values of the array. For EA-backed blocks, this would require
20682091
# converting to a 2-D ndarray of objects.
20692092
# Instead, we unstack an ndarray of integer positions, followed by
@@ -2099,6 +2122,7 @@ def _unstack(
20992122

21002123
class NumpyBlock(libinternals.NumpyBlock, Block):
21012124
values: np.ndarray
2125+
__slots__ = ()
21022126

21032127
@property
21042128
def is_view(self) -> bool:
@@ -2117,10 +2141,28 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
21172141
def values_for_json(self) -> np.ndarray:
21182142
return self.values
21192143

2144+
@cache_readonly
2145+
def is_numeric(self) -> bool: # type: ignore[override]
2146+
dtype = self.values.dtype
2147+
kind = dtype.kind
2148+
2149+
return kind in "fciub"
2150+
2151+
@cache_readonly
2152+
def is_object(self) -> bool: # type: ignore[override]
2153+
return self.values.dtype.kind == "O"
2154+
21202155

21212156
class NumericBlock(NumpyBlock):
2157+
# this Block type is kept for backwards-compatibility
2158+
# TODO(3.0): delete and remove deprecation in __init__.py.
2159+
__slots__ = ()
2160+
2161+
2162+
class ObjectBlock(NumpyBlock):
2163+
# this Block type is kept for backwards-compatibility
2164+
# TODO(3.0): delete and remove deprecation in __init__.py.
21222165
__slots__ = ()
2123-
is_numeric = True
21242166

21252167

21262168
class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
@@ -2256,49 +2298,6 @@ class DatetimeTZBlock(DatetimeLikeBlock):
22562298
values_for_json = NDArrayBackedExtensionBlock.values_for_json
22572299

22582300

2259-
class ObjectBlock(NumpyBlock):
2260-
__slots__ = ()
2261-
is_object = True
2262-
2263-
@maybe_split
2264-
def convert(
2265-
self,
2266-
*,
2267-
copy: bool = True,
2268-
using_cow: bool = False,
2269-
) -> list[Block]:
2270-
"""
2271-
attempt to cast any object types to better types return a copy of
2272-
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2273-
"""
2274-
if self.dtype != _dtype_obj:
2275-
# GH#50067 this should be impossible in ObjectBlock, but until
2276-
# that is fixed, we short-circuit here.
2277-
if using_cow:
2278-
return [self.copy(deep=False)]
2279-
return [self]
2280-
2281-
values = self.values
2282-
if values.ndim == 2:
2283-
# maybe_split ensures we only get here with values.shape[0] == 1,
2284-
# avoid doing .ravel as that might make a copy
2285-
values = values[0]
2286-
2287-
res_values = lib.maybe_convert_objects(
2288-
values,
2289-
convert_non_numeric=True,
2290-
)
2291-
refs = None
2292-
if copy and res_values is values:
2293-
res_values = values.copy()
2294-
elif res_values is values and using_cow:
2295-
refs = self.refs
2296-
2297-
res_values = ensure_block_shape(res_values, self.ndim)
2298-
res_values = maybe_coerce_values(res_values)
2299-
return [self.make_block(res_values, refs=refs)]
2300-
2301-
23022301
# -----------------------------------------------------------------
23032302
# Constructor Helpers
23042303

@@ -2357,10 +2356,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]:
23572356
kind = dtype.kind
23582357
if kind in "Mm":
23592358
return DatetimeLikeBlock
2360-
elif kind in "fciub":
2361-
return NumericBlock
23622359

2363-
return ObjectBlock
2360+
return NumpyBlock
23642361

23652362

23662363
def new_block_2d(

pandas/tests/extension/base/casting.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas.util._test_decorators as td
55

66
import pandas as pd
7-
from pandas.core.internals import ObjectBlock
7+
from pandas.core.internals.blocks import NumpyBlock
88
from pandas.tests.extension.base.base import BaseExtensionTests
99

1010

@@ -16,7 +16,9 @@ def test_astype_object_series(self, all_data):
1616
result = ser.astype(object)
1717
assert result.dtype == np.dtype(object)
1818
if hasattr(result._mgr, "blocks"):
19-
assert isinstance(result._mgr.blocks[0], ObjectBlock)
19+
blk = result._mgr.blocks[0]
20+
assert isinstance(blk, NumpyBlock)
21+
assert blk.is_object
2022
assert isinstance(result._mgr.array, np.ndarray)
2123
assert result._mgr.array.dtype == np.dtype(object)
2224

@@ -26,7 +28,8 @@ def test_astype_object_frame(self, all_data):
2628
result = df.astype(object)
2729
if hasattr(result._mgr, "blocks"):
2830
blk = result._mgr.blocks[0]
29-
assert isinstance(blk, ObjectBlock), type(blk)
31+
assert isinstance(blk, NumpyBlock), type(blk)
32+
assert blk.is_object
3033
assert isinstance(result._mgr.arrays[0], np.ndarray)
3134
assert result._mgr.arrays[0].dtype == np.dtype(object)
3235

pandas/tests/frame/test_block_internals.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@
2020
option_context,
2121
)
2222
import pandas._testing as tm
23-
from pandas.core.internals import (
24-
NumericBlock,
25-
ObjectBlock,
26-
)
23+
from pandas.core.internals.blocks import NumpyBlock
2724

2825
# Segregated collection of methods that require the BlockManager internal data
2926
# structure
@@ -387,7 +384,8 @@ def test_constructor_no_pandas_array(self):
387384
result = DataFrame({"A": arr})
388385
expected = DataFrame({"A": [1, 2, 3]})
389386
tm.assert_frame_equal(result, expected)
390-
assert isinstance(result._mgr.blocks[0], NumericBlock)
387+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
388+
assert result._mgr.blocks[0].is_numeric
391389

392390
def test_add_column_with_pandas_array(self):
393391
# GH 26390
@@ -400,8 +398,10 @@ def test_add_column_with_pandas_array(self):
400398
"c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)),
401399
}
402400
)
403-
assert type(df["c"]._mgr.blocks[0]) == ObjectBlock
404-
assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock
401+
assert type(df["c"]._mgr.blocks[0]) == NumpyBlock
402+
assert df["c"]._mgr.blocks[0].is_object
403+
assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock
404+
assert df2["c"]._mgr.blocks[0].is_object
405405
tm.assert_frame_equal(df, df2)
406406

407407

pandas/tests/internals/test_api.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ def test_namespace():
2727
]
2828
expected = [
2929
"Block",
30-
"NumericBlock",
3130
"DatetimeTZBlock",
3231
"ExtensionBlock",
33-
"ObjectBlock",
3432
"make_block",
3533
"DataManager",
3634
"ArrayManager",

pandas/tests/series/test_constructors.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
IntervalArray,
4747
period_array,
4848
)
49-
from pandas.core.internals.blocks import NumericBlock
49+
from pandas.core.internals.blocks import NumpyBlock
5050

5151

5252
class TestSeriesConstructors:
@@ -2098,7 +2098,8 @@ def test_constructor_no_pandas_array(self, using_array_manager):
20982098
result = Series(ser.array)
20992099
tm.assert_series_equal(ser, result)
21002100
if not using_array_manager:
2101-
assert isinstance(result._mgr.blocks[0], NumericBlock)
2101+
assert isinstance(result._mgr.blocks[0], NumpyBlock)
2102+
assert result._mgr.blocks[0].is_numeric
21022103

21032104
@td.skip_array_manager_invalid_test
21042105
def test_from_array(self):

0 commit comments

Comments
 (0)