Skip to content

REF: move actual lookup and dispatch to array_op from frame into internals #39772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
40 changes: 11 additions & 29 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6091,14 +6091,10 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(func)

right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
bm = self._mgr.apply(array_op, right=right)
return type(self)(bm)
bm = self._mgr.operate_scalar(right, func)

elif isinstance(right, DataFrame):
assert self.index.equals(right.index)
Expand All @@ -6107,38 +6103,24 @@ def _dispatch_frame_op(self, right, func, axis: Optional[int] = None):
# fails in cases with empty columns reached via
# _frame_arith_method_with_reindex

# TODO operate_blockwise expects a manager of the same type
bm = self._mgr.operate_blockwise(
right._mgr, array_op # type: ignore[arg-type]
)
return type(self)(bm)

elif isinstance(right, Series) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)

right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)

arrays = [
array_op(_left, _right)
for _left, _right in zip(self._iter_column_arrays(), right)
]
# TODO operate_manager expects a manager of the same type
bm = self._mgr.operate_manager(right._mgr, func) # type: ignore[arg-type]

elif isinstance(right, Series):
assert right.index.equals(self.index) # Handle other cases later
right = right._values
if axis == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert right.index.equals(self.get_axis(axis))

# axis=1 means we want to operate row-by-row
assert right.index.equals(self.columns)
else:
assert right.index.equals(self.index)

arrays = [array_op(left, right) for left in self._iter_column_arrays()]
right = right._values
bm = self._mgr.operate_array(right, func, axis)

else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)

return type(self)._from_arrays(
arrays, self.columns, self.index, verify_integrity=False
)
return type(self)(bm)

def _combine_frame(self, other: DataFrame, func, fill_value=None):
# at this point we have `self._indexed_same(other)`
Expand Down
69 changes: 66 additions & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from pandas._libs import algos as libalgos, lib
from pandas._typing import ArrayLike, DtypeObj, Hashable
from pandas._typing import ArrayLike, DtypeObj, Hashable, Scalar
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar
Expand All @@ -22,6 +22,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import array_equals, isna

from pandas.core import ops
import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.sparse import SparseDtype
Expand Down Expand Up @@ -187,11 +188,73 @@ def reduce(
indexer = np.arange(self.shape[0])
return new_mgr, indexer

def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
def operate_scalar(self, other: Scalar, op) -> ArrayManager:
"""
Apply array_op blockwise with another (aligned) BlockManager.
Element-wise (arithmetic/comparison/logical) operation with other scalar.

Parameters
----------
other : scalar
op : operator function (eg ``operator.add``)

Returns
-------
ArrayManager
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(op)
result_arrays = [array_op(left, other) for left in self.arrays]
return type(self)(result_arrays, self._axes)

def operate_array(self, other: ArrayLike, op, axis: int) -> ArrayManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other array.

The array is already checked to be of the correct length.

Parameters
----------
other : np.ndarray or ExtensionArray
op : operator function (eg ``operator.add``)
axis : int
Whether to match the array on the index and broadcast along the
columns (axis=0) or match the array on the columns and broadcast
along the rows (axis=1).

Returns
-------
ArrayManager
"""
array_op = ops.get_array_op(op)
if axis == 1:
# match on the columns -> operate on each column array with single
# element from other array
result_arrays = [
array_op(left, right_scalar)
for left, right_scalar in zip(self.arrays, other)
]
else:
# match on the rows -> operate for each column array with full other array
result_arrays = [array_op(left, other) for left in self.arrays]
return type(self)(result_arrays, self._axes)

def operate_manager(self, other: ArrayManager, op) -> ArrayManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other ArrayManager.

The other ArrayManager is already aligned with `self`.

Parameters
----------
other : ArrayManager
op : operator function (eg ``operator.add``)

Returns
-------
ArrayManager
"""
# TODO what if `other` is BlockManager ?
array_op = ops.get_array_op(op)
left_arrays = self.arrays
right_arrays = other.arrays
result_arrays = [
Expand Down
70 changes: 67 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import numpy as np

from pandas._libs import internals as libinternals, lib
from pandas._typing import ArrayLike, Dtype, DtypeObj, Shape
from pandas._typing import ArrayLike, Dtype, DtypeObj, Scalar, Shape
from pandas.errors import PerformanceWarning
from pandas.util._validators import validate_bool_kwarg

Expand All @@ -35,6 +35,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import array_equals, isna

from pandas.core import ops
import pandas.core.algorithms as algos
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -366,10 +367,73 @@ def reduce(
new_mgr = type(self).from_blocks(res_blocks, [self.items, index])
return new_mgr, indexer

def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager:
def operate_scalar(self, other: Scalar, op) -> BlockManager:
"""
Apply array_op blockwise with another (aligned) BlockManager.
Element-wise (arithmetic/comparison/logical) operation with other scalar.

Parameters
----------
other : scalar
op : operator function (eg ``operator.add``)

Returns
-------
BlockManager
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = ops.get_array_op(op)
return self.apply(array_op, right=other)

def operate_array(self, other: ArrayLike, op, axis: int) -> BlockManager:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note I currently add 3 specialized methods (operate_scalar, operate_array and operate_manager) for the three possible cases that are left to handle here.
But it could of course also be a single operate method that combines the three with some if/elif/else checks (but those checks would duplicate a bit the checks done in _dispatch_frame_op where those get called)

"""
Element-wise (arithmetic/comparison/logical) operation with other array.

The array is already checked to be of the correct length.

Parameters
----------
other : np.ndarray or ExtensionArray
op : operator function (eg ``operator.add``)
axis : int
Whether to match the array on the index and broadcast along the
columns (axis=0) or match the array on the columns and broadcast
along the rows (axis=1).

Returns
-------
BlockManager
"""
array_op = ops.get_array_op(op)
if axis == 1:
# match on the columns -> operate on each column array with single
# element from other array
arrays = [
array_op(self.iget_values(i), _right) for i, _right in enumerate(other)
]
else:
# match on the rows -> operate for each column array with full other array
arrays = [
array_op(self.iget_values(i), other) for i in range(len(self.items))
]

return create_block_manager_from_arrays(arrays, self.axes[0], self.axes)

def operate_manager(self, other: BlockManager, op) -> BlockManager:
"""
Element-wise (arithmetic/comparison/logical) operation with other BlockManager.

The other BlockManager is already aligned with `self`.

Parameters
----------
other : BlockManager
op : operator function (eg ``operator.add``)

Returns
-------
BlockManager
"""
array_op = ops.get_array_op(op)
return operate_blockwise(self, other, array_op)

def apply(
Expand Down