-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: unify NumpyBlock, ObjectBlock, and NumericBlock #52817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
db60b58
4583b47
9ae67ef
e827f7b
f85e1a1
6d07763
e52b6e8
e6f9b3f
f6d25ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -469,13 +469,36 @@ def convert( | |
using_cow: bool = False, | ||
) -> list[Block]: | ||
""" | ||
attempt to coerce any object types to better types return a copy | ||
of the block (if copy = True) by definition we are not an ObjectBlock | ||
here! | ||
Attempt to coerce any object types to better types. Return a copy | ||
of the block (if copy = True). | ||
""" | ||
if not copy and using_cow: | ||
return [self.copy(deep=False)] | ||
return [self.copy()] if copy else [self] | ||
if not self.is_object: | ||
if not copy and using_cow: | ||
return [self.copy(deep=False)] | ||
return [self.copy()] if copy else [self] | ||
|
||
if self.ndim != 1 and self.shape[0] != 1: | ||
return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow) | ||
|
||
values = self.values | ||
if values.ndim == 2: | ||
# the check above ensures we only get here with values.shape[0] == 1, | ||
# avoid doing .ravel as that might make a copy | ||
values = values[0] | ||
|
||
res_values = lib.maybe_convert_objects( | ||
values, # type: ignore[arg-type] | ||
convert_non_numeric=True, | ||
) | ||
refs = None | ||
if copy and res_values is values: | ||
res_values = values.copy() | ||
elif res_values is values and using_cow: | ||
refs = self.refs | ||
|
||
res_values = ensure_block_shape(res_values, self.ndim) | ||
res_values = maybe_coerce_values(res_values) | ||
return [self.make_block(res_values, refs=refs)] | ||
|
||
# --------------------------------------------------------------------- | ||
# Array-Like Methods | ||
|
@@ -680,7 +703,7 @@ def _replace_regex( | |
List[Block] | ||
""" | ||
if not self._can_hold_element(to_replace): | ||
# i.e. only ObjectBlock, but could in principle include a | ||
# i.e. only if self.is_object is True, but could in principle include a | ||
# String ExtensionBlock | ||
if using_cow: | ||
return [self.copy(deep=False)] | ||
|
@@ -1273,7 +1296,7 @@ def fillna( | |
) -> list[Block]: | ||
""" | ||
fillna on the block with the value. If we fail, then convert to | ||
ObjectBlock and try again | ||
block to hold objects instead and try again | ||
""" | ||
# Caller is responsible for validating limit; if int it is strictly positive | ||
inplace = validate_bool_kwarg(inplace, "inplace") | ||
|
@@ -2064,7 +2087,7 @@ def _unstack( | |
needs_masking: npt.NDArray[np.bool_], | ||
): | ||
# ExtensionArray-safe unstack. | ||
# We override ObjectBlock._unstack, which unstacks directly on the | ||
# We override Block._unstack, which unstacks directly on the | ||
# values of the array. For EA-backed blocks, this would require | ||
# converting to a 2-D ndarray of objects. | ||
# Instead, we unstack an ndarray of integer positions, followed by | ||
|
@@ -2100,6 +2123,7 @@ def _unstack( | |
|
||
class NumpyBlock(libinternals.NumpyBlock, Block): | ||
values: np.ndarray | ||
__slots__ = () | ||
|
||
@property | ||
def is_view(self) -> bool: | ||
|
@@ -2118,10 +2142,28 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: | |
def values_for_json(self) -> np.ndarray: | ||
return self.values | ||
|
||
@cache_readonly | ||
def is_numeric(self) -> bool: # type: ignore[override] | ||
dtype = self.values.dtype | ||
kind = dtype.kind | ||
|
||
return kind in "fciub" | ||
|
||
@cache_readonly | ||
def is_object(self) -> bool: # type: ignore[override] | ||
return self.values.dtype.kind == "O" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there dtypes other than object-dtype that have kind=="O"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope. FWIW, the letter codes are defined as an enum here: https://github.com/numpy/numpy/blob/f45f692abac62265b760c0a249e8b417707c47d1/numpy/core/include/numpy/ndarraytypes.h#L94-L140 |
||
|
||
|
||
class NumericBlock(NumpyBlock): | ||
# this Block type is kept for backwards-compatibility | ||
# TODO(3.0): delete and remove deprecation in __init__.py. | ||
__slots__ = () | ||
|
||
|
||
class ObjectBlock(NumpyBlock): | ||
# this Block type is kept for backwards-compatibility | ||
ngoldbaum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# TODO(3.0): delete and remove deprecation in __init__.py. | ||
__slots__ = () | ||
is_numeric = True | ||
|
||
|
||
class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): | ||
|
@@ -2257,49 +2299,6 @@ class DatetimeTZBlock(DatetimeLikeBlock): | |
values_for_json = NDArrayBackedExtensionBlock.values_for_json | ||
|
||
|
||
class ObjectBlock(NumpyBlock): | ||
__slots__ = () | ||
is_object = True | ||
|
||
@maybe_split | ||
def convert( | ||
self, | ||
*, | ||
copy: bool = True, | ||
using_cow: bool = False, | ||
) -> list[Block]: | ||
""" | ||
attempt to cast any object types to better types return a copy of | ||
the block (if copy = True) by definition we ARE an ObjectBlock!!!!! | ||
""" | ||
if self.dtype != _dtype_obj: | ||
# GH#50067 this should be impossible in ObjectBlock, but until | ||
# that is fixed, we short-circuit here. | ||
if using_cow: | ||
return [self.copy(deep=False)] | ||
return [self] | ||
|
||
values = self.values | ||
if values.ndim == 2: | ||
# maybe_split ensures we only get here with values.shape[0] == 1, | ||
# avoid doing .ravel as that might make a copy | ||
values = values[0] | ||
|
||
res_values = lib.maybe_convert_objects( | ||
values, | ||
convert_non_numeric=True, | ||
) | ||
refs = None | ||
if copy and res_values is values: | ||
res_values = values.copy() | ||
elif res_values is values and using_cow: | ||
refs = self.refs | ||
|
||
res_values = ensure_block_shape(res_values, self.ndim) | ||
res_values = maybe_coerce_values(res_values) | ||
return [self.make_block(res_values, refs=refs)] | ||
|
||
|
||
# ----------------------------------------------------------------- | ||
# Constructor Helpers | ||
|
||
|
@@ -2358,10 +2357,8 @@ def get_block_type(dtype: DtypeObj) -> type[Block]: | |
kind = dtype.kind | ||
if kind in "Mm": | ||
return DatetimeLikeBlock | ||
elif kind in "fciub": | ||
return NumericBlock | ||
|
||
return ObjectBlock | ||
return NumpyBlock | ||
|
||
|
||
def new_block_2d( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,10 +20,7 @@ | |
option_context, | ||
) | ||
import pandas._testing as tm | ||
from pandas.core.internals import ( | ||
NumericBlock, | ||
ObjectBlock, | ||
) | ||
from pandas.core.internals.blocks import NumpyBlock | ||
|
||
# Segregated collection of methods that require the BlockManager internal data | ||
# structure | ||
|
@@ -387,7 +384,8 @@ def test_constructor_no_pandas_array(self): | |
result = DataFrame({"A": arr}) | ||
expected = DataFrame({"A": [1, 2, 3]}) | ||
tm.assert_frame_equal(result, expected) | ||
assert isinstance(result._mgr.blocks[0], NumericBlock) | ||
assert isinstance(result._mgr.blocks[0], NumpyBlock) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check is_numeric here? |
||
assert result._mgr.blocks[0].is_numeric | ||
|
||
def test_add_column_with_pandas_array(self): | ||
# GH 26390 | ||
|
@@ -400,8 +398,10 @@ def test_add_column_with_pandas_array(self): | |
"c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), | ||
} | ||
) | ||
assert type(df["c"]._mgr.blocks[0]) == ObjectBlock | ||
assert type(df2["c"]._mgr.blocks[0]) == ObjectBlock | ||
assert type(df["c"]._mgr.blocks[0]) == NumpyBlock | ||
assert df["c"]._mgr.blocks[0].is_object | ||
assert type(df2["c"]._mgr.blocks[0]) == NumpyBlock | ||
assert df2["c"]._mgr.blocks[0].is_object | ||
tm.assert_frame_equal(df, df2) | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.