-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REGR: Fixed AssertionError in groupby #31616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
f868874
e2fa8f5
70608cf
4da6bff
6eeda42
04d2c72
8a5db12
6eb1cfd
b4554be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1022,6 +1022,10 @@ def _cython_agg_blocks( | |
agg_blocks: List[Block] = [] | ||
new_items: List[np.ndarray] = [] | ||
deleted_items: List[np.ndarray] = [] | ||
# Some object-dtype blocks might be split into List[Block[T], Block[U]] | ||
split_items: List[np.ndarray] = [] | ||
split_frames: List[DataFrame] = [] | ||
|
||
no_result = object() | ||
for block in data.blocks: | ||
# Avoid inheriting result from earlier in the loop | ||
|
@@ -1061,40 +1065,56 @@ def _cython_agg_blocks( | |
else: | ||
result = cast(DataFrame, result) | ||
# unwrap DataFrame to get array | ||
if len(result._data.blocks) != 1: | ||
# We've split an object block! Everything we've assumed | ||
# about a single block input returning a single block output | ||
# is a lie. To keep the code-path for the typical non-split case | ||
# clean, we choose to clean up this mess later on. | ||
split_items.append(locs) | ||
split_frames.append(result) | ||
continue | ||
|
||
assert len(result._data.blocks) == 1 | ||
result = result._data.blocks[0].values | ||
if isinstance(result, np.ndarray) and result.ndim == 1: | ||
result = result.reshape(1, -1) | ||
|
||
finally: | ||
assert not isinstance(result, DataFrame) | ||
|
||
if result is not no_result: | ||
# see if we can cast the block back to the original dtype | ||
result = maybe_downcast_numeric(result, block.dtype) | ||
|
||
if block.is_extension and isinstance(result, np.ndarray): | ||
# e.g. block.values was an IntegerArray | ||
# (1, N) case can occur if block.values was Categorical | ||
# and result is ndarray[object] | ||
assert result.ndim == 1 or result.shape[0] == 1 | ||
try: | ||
# Cast back if feasible | ||
result = type(block.values)._from_sequence( | ||
result.ravel(), dtype=block.values.dtype | ||
) | ||
except ValueError: | ||
# reshape to be valid for non-Extension Block | ||
result = result.reshape(1, -1) | ||
assert not isinstance(result, DataFrame) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if result is not no_result: | ||
# see if we can cast the block back to the original dtype | ||
result = maybe_downcast_numeric(result, block.dtype) | ||
|
||
if block.is_extension and isinstance(result, np.ndarray): | ||
# e.g. block.values was an IntegerArray | ||
# (1, N) case can occur if block.values was Categorical | ||
# and result is ndarray[object] | ||
assert result.ndim == 1 or result.shape[0] == 1 | ||
try: | ||
# Cast back if feasible | ||
result = type(block.values)._from_sequence( | ||
result.ravel(), dtype=block.values.dtype | ||
) | ||
except ValueError: | ||
# reshape to be valid for non-Extension Block | ||
result = result.reshape(1, -1) | ||
|
||
agg_block: Block = block.make_block(result) | ||
agg_block: Block = block.make_block(result) | ||
|
||
new_items.append(locs) | ||
agg_blocks.append(agg_block) | ||
|
||
if not agg_blocks: | ||
if not (agg_blocks or split_frames): | ||
raise DataError("No numeric types to aggregate") | ||
|
||
if split_items: | ||
# Clean up the mess left over from split blocks. | ||
for locs, result in zip(split_items, split_frames): | ||
assert len(locs) == result.shape[1] | ||
for i, loc in enumerate(locs): | ||
new_items.append(np.array([loc], dtype=locs.dtype)) | ||
agg_blocks.append(result.iloc[:, [i]]._data.blocks[0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we avoid some of this by changing the agg_blocks.append to agg_blocks.extend? and construct these separate blocks up in 1069-1076? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried that but it didn't look promising so I abandoned it. Several things work against that
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm this might be out of scope, but i think if we used block.apply it would handle both the make_block and potential splitting |
||
|
||
# reset the locs in the blocks to correspond to our | ||
# current ordering | ||
indexer = np.concatenate(new_items) | ||
|
Uh oh!
There was an error while loading. Please reload this page.