Skip to content

REF: move actual lookup and dispatch to array_op from frame into internals #39772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
38 changes: 10 additions & 28 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6091,14 +6091,10 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(func)

right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
bm = self._mgr.apply(array_op, right=right)
return type(self)(bm)
bm = self._mgr.operate_scalar(right, func)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
Expand All @@ -6108,37 +6104,23 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
# _frame_arith_method_with_reindex

# TODO operate_blockwise expects a manager of the same type
bm = self._mgr.operate_blockwise(
right._mgr, array_op # type: ignore[arg-type]
)
return type(self)(bm)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

arrays = [
array_op(_left, _right)
for _left, _right in zip(self._iter_column_arrays(), right)
]
bm = self._mgr.operate_manager(right._mgr, func) # type: ignore[arg-type]

elif isinstance(right, Series):
assert right.index.equals(self.index) # Handle other cases later
right = right._values
if axis == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert right.index.equals(self.get_axis(axis))

# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)
else:
assert right.index.equals(self.index) # Handle other cases later

arrays = [array_op(left, right) for left in self._iter_column_arrays()]
right = right._values
bm = self._mgr.operate_array(right, func, axis)

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

return type(self)._from_arrays(
arrays, self.columns, self.index, verify_integrity=False
)
return type(self)(bm)

def _combine_frame(self, other: DataFrame, func, fill_value=None):
# at this point we have `self._indexed_same(other)`
Expand Down
32 changes: 32 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import array_equals, isna

from pandas.core import ops
import pandas.core.algorithms as algos
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -366,6 +367,37 @@ def reduce(
new_mgr = type(self).from_blocks(res_blocks, [self.items, index])
return new_mgr, indexer

def operate_scalar(self, other, op) -> BlockManager:
"""
TODO fill in
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(op)
return self.apply(array_op, right=other)

def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note I currently add 3 specialized methods (operate_scalar, operate_array and operate_manager) for the three possible cases that are left to handle here.
But it could of course also be a single operate method that combines the three with some if/elif/else checks (but those checks would duplicate a bit the checks done in _dispatch_frame_op where those get called)

"""
TODO fill in
"""
array_op = ops.get_array_op(op)
if axis == 1:
arrays = [
array_op(self.iget_values(i), _right) for i, _right in enumerate(other)
]
else:
arrays = [
array_op(self.iget_values(i), other) for i in range(len(self.items))
]

return create_block_manager_from_arrays(arrays, self.axes[0], self.axes)

def operate_manager(self, other: BlockManager, op) -> BlockManager:
"""
TODO fill in
"""
array_op = ops.get_array_op(op)
return self.operate_blockwise(other, array_op)

def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager:
"""
Apply array_op blockwise with another (aligned) BlockManager.
Expand Down