-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
PERF: block-wise arithmetic for frame-with-frame #32779
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
1697252
a7764d6
30a836d
3559698
4334353
cb40b0c
713a776
95ef3ad
61e5cd6
89c3d7b
e348e46
519c757
2b1ba18
53e93fc
91c86a3
2034084
8aedf35
0c12d35
9727562
6661dd3
42bbbf3
65ab023
0d958a3
7f91e74
56eef51
4baea6f
41a4e7a
b23144e
7f24d57
ae744b7
b14a98c
f42c403
8a2807e
fa046f0
fd10fb6
7ea5d3a
7150e87
bddfbb0
25f83d6
2142d29
0ca2125
1ea0cc0
2bfc308
d5ad2a0
108004b
7ca5f9a
e78570d
33dfbdf
30f655b
65a4eaf
30d6580
fe21f9c
f86deb4
f3dc465
b57f52c
0c46531
463a145
32e70d8
7989251
455e45e
41e8e78
ac8eea8
8c4f951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
This is not a public API. | ||
""" | ||
import operator | ||
from typing import TYPE_CHECKING, Optional, Set | ||
from typing import TYPE_CHECKING, List, Optional, Set, Tuple | ||
|
||
import numpy as np | ||
|
||
|
@@ -57,6 +57,7 @@ | |
|
||
if TYPE_CHECKING: | ||
from pandas import DataFrame # noqa:F401 | ||
from pandas.core.internals.blocks import Block # noqa: F401 | ||
|
||
# ----------------------------------------------------------------------------- | ||
# constants | ||
|
@@ -293,6 +294,85 @@ def fill_binop(left, right, fill_value): | |
# Dispatch logic | ||
|
||
|
||
def operate_blockwise(left, right, array_op): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok much nicer / readable that before. I would consider moving this to a new module to de-nest these functions for blockwise operations (e.g. pandas/core/ops/blockwise.py), but can be later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think if you move this to ops/blockwise.py would be a +1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would really like to put this in a new file |
||
assert right._indexed_same(left) | ||
|
||
def get_same_shape_values( | ||
lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool | ||
) -> Tuple[ArrayLike, ArrayLike]: | ||
""" | ||
Slice lblk.values to align with rblk. Squeeze if we have EAs. | ||
""" | ||
lvals = lblk.values | ||
rvals = rblk.values | ||
|
||
# TODO(EA2D): with 2D EAs pnly this first clause would be needed | ||
if not (left_ea or right_ea): | ||
lvals = lvals[rblk.mgr_locs.indexer, :] | ||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) | ||
elif left_ea and right_ea: | ||
assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape) | ||
elif right_ea: | ||
# lvals are 2D, rvals are 1D | ||
lvals = lvals[rblk.mgr_locs.indexer, :] | ||
assert lvals.shape[0] == 1, lvals.shape | ||
lvals = lvals[0, :] | ||
else: | ||
# lvals are 1D, rvals are 2D | ||
assert rvals.shape[0] == 1, rvals.shape | ||
rvals = rvals[0, :] | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return lvals, rvals | ||
|
||
res_blks: List["Block"] = [] | ||
rmgr = right._data | ||
for n, blk in enumerate(left._data.blocks): | ||
locs = blk.mgr_locs | ||
blk_vals = blk.values | ||
|
||
left_ea = not isinstance(blk_vals, np.ndarray) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add some comments here.(general + line comments as needed) |
||
rblks = rmgr._slice_take_blocks_ax0(locs.indexer) | ||
|
||
if left_ea: | ||
assert len(locs) == 1, locs | ||
assert len(rblks) == 1, rblks | ||
assert rblks[0].shape[0] == 1, rblks[0].shape | ||
|
||
for k, rblk in enumerate(rblks): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
right_ea = not isinstance(rblk.values, np.ndarray) | ||
|
||
lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea) | ||
|
||
res_values = array_op(lvals, rvals) | ||
nbs = rblk._split_op_result(res_values) | ||
|
||
if right_ea or left_ea: | ||
assert len(nbs) == 1 | ||
else: | ||
assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape) | ||
|
||
for nb in nbs: | ||
# Reset mgr_locs to correspond to our original DataFrame | ||
nblocs = locs.as_array[nb.mgr_locs.indexer] | ||
nb.mgr_locs = nblocs | ||
# Assertions are disabled for performance, but should hold: | ||
# assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape) | ||
# assert all(x in locs.as_array for x in nb.mgr_locs.as_array) | ||
|
||
res_blks.extend(nbs) | ||
|
||
# Assertions are disabled for performance, but should hold: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they really affect a lot? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, but since joris is running benchmarks... though now that this uses the new DataFrame constructor fastpath, i think the timings become unambiguous. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can you show your benchmarks one more time |
||
# slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array} | ||
# nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks) | ||
# assert nlocs == len(left.columns), (nlocs, len(left.columns)) | ||
# assert len(slocs) == nlocs, (len(slocs), nlocs) | ||
# assert slocs == set(range(nlocs)), slocs | ||
|
||
new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False) | ||
return new_mgr | ||
|
||
|
||
def dispatch_to_series(left, right, func, str_rep=None, axis=None): | ||
""" | ||
Evaluate the frame operation func(left, right) by evaluating | ||
|
@@ -325,8 +405,9 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): | |
elif isinstance(right, ABCDataFrame): | ||
assert right._indexed_same(left) | ||
|
||
def column_op(a, b): | ||
return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An alternative would be to improve the performance of this line, and avoid the complexity that is added in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. regardless this is worth doing on L343/424 and L348/429 |
||
array_op = get_array_op(func, str_rep=str_rep) | ||
bm = operate_blockwise(left, right, array_op) | ||
return type(left)(bm) | ||
|
||
elif isinstance(right, ABCSeries) and axis == "columns": | ||
# We only get here if called via _combine_series_frame, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -964,7 +964,9 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): | |
obj = tm.box_expected(dti, box_with_array) | ||
expected = tm.box_expected(expected, box_with_array) | ||
|
||
warn = PerformanceWarning if box_with_array is not pd.DataFrame else None | ||
warn = None | ||
if box_with_array is not pd.DataFrame or tz_naive_fixture is None: | ||
warn = PerformanceWarning | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why this changed? We do / do not raise a warning on the array-level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't do a warning when operating |
||
with tm.assert_produces_warning(warn): | ||
result = obj - obj.astype(object) | ||
tm.assert_equal(result, expected) | ||
|
@@ -1388,8 +1390,7 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array): | |
s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")]) | ||
s = tm.box_expected(s, box_with_array) | ||
|
||
warn = None if box_with_array is pd.DataFrame else PerformanceWarning | ||
with tm.assert_produces_warning(warn): | ||
with tm.assert_produces_warning(PerformanceWarning): | ||
other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]) | ||
other = tm.box_expected(other, box_with_array) | ||
result = s + other | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it needed to test it with all those different ops?
I think what we are interested in catching with the benchmark, is the general code handling wide dataframes (how the dispatching to the actual op is done, dispathing to block/column instead of series, etc), not the actual op right? So for those aspects, they all use the same code, and testing all ops seems overkill? (it just adds to the number of benchmarks needlessly, making it harder to run the benchmark suite / interpret the results)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No strong opinion here.
Yah, this is a hassle. Best guess is eventually we're going to land on a "throw hardware at the problem" solution, but that doesn't help the local-running troubles.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then let's keep only 2 of them or so, I would say
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about four: one comparison, one logical, floordiv (for which we have special handling), and one other arithmetic