Skip to content

Commit b1801bd

Browse files
authored
REF: share _cython_agg_general (#43762)
1 parent d886f10 commit b1801bd

File tree

2 files changed

+76
-132
lines changed

2 files changed

+76
-132
lines changed

pandas/core/groupby/generic.py

Lines changed: 3 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
is_dict_like,
4949
is_integer_dtype,
5050
is_interval_dtype,
51-
is_numeric_dtype,
5251
is_scalar,
5352
)
5453
from pandas.core.dtypes.missing import (
@@ -78,6 +77,7 @@
7877
_apply_docs,
7978
_transform_template,
8079
group_selection_context,
80+
warn_dropping_nuisance_columns_deprecated,
8181
)
8282
from pandas.core.indexes.api import (
8383
Index,
@@ -334,43 +334,6 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame:
334334
output = self._reindex_output(output)
335335
return output
336336

337-
def _cython_agg_general(
338-
self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
339-
):
340-
341-
obj = self._selected_obj
342-
objvals = obj._values
343-
data = obj._mgr
344-
345-
if numeric_only and not is_numeric_dtype(obj.dtype):
346-
# GH#41291 match Series behavior
347-
raise NotImplementedError(
348-
f"{type(self).__name__}.{how} does not implement numeric_only."
349-
)
350-
351-
# This is overkill because it is only called once, but is here to
352-
# mirror the array_func used in DataFrameGroupBy._cython_agg_general
353-
def array_func(values: ArrayLike) -> ArrayLike:
354-
try:
355-
result = self.grouper._cython_operation(
356-
"aggregate", values, how, axis=data.ndim - 1, min_count=min_count
357-
)
358-
except NotImplementedError:
359-
# generally if we have numeric_only=False
360-
# and non-applicable functions
361-
# try to python agg
362-
# TODO: shouldn't min_count matter?
363-
result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
364-
365-
return result
366-
367-
result = array_func(objvals)
368-
369-
ser = self.obj._constructor(
370-
result, index=self.grouper.result_index, name=obj.name
371-
)
372-
return self._reindex_output(ser)
373-
374337
def _indexed_output_to_ndframe(
375338
self, output: Mapping[base.OutputKey, ArrayLike]
376339
) -> Series:
@@ -970,46 +933,6 @@ def _iterate_slices(self) -> Iterable[Series]:
970933

971934
yield values
972935

973-
def _cython_agg_general(
974-
self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
975-
) -> DataFrame:
976-
# Note: we never get here with how="ohlc"; that goes through SeriesGroupBy
977-
978-
data: Manager2D = self._get_data_to_aggregate()
979-
980-
if numeric_only:
981-
data = data.get_numeric_data(copy=False)
982-
983-
def array_func(values: ArrayLike) -> ArrayLike:
984-
try:
985-
result = self.grouper._cython_operation(
986-
"aggregate", values, how, axis=data.ndim - 1, min_count=min_count
987-
)
988-
except NotImplementedError:
989-
# generally if we have numeric_only=False
990-
# and non-applicable functions
991-
# try to python agg
992-
# TODO: shouldn't min_count matter?
993-
result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
994-
995-
return result
996-
997-
# TypeError -> we may have an exception in trying to aggregate
998-
# continue and exclude the block
999-
new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
1000-
1001-
if len(new_mgr) < len(data):
1002-
warnings.warn(
1003-
f"Dropping invalid columns in {type(self).__name__}.{how} "
1004-
"is deprecated. In a future version, a TypeError will be raised. "
1005-
f"Before calling .{how}, select only columns which should be "
1006-
"valid for the function.",
1007-
FutureWarning,
1008-
stacklevel=4,
1009-
)
1010-
1011-
return self._wrap_agged_manager(new_mgr)
1012-
1013936
def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
1014937
if self.grouper.nkeys != 1:
1015938
raise AssertionError("Number of keys must be 1")
@@ -1195,14 +1118,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
11951118
res_mgr.set_axis(1, mgr.axes[1])
11961119

11971120
if len(res_mgr) < len(mgr):
1198-
warnings.warn(
1199-
f"Dropping invalid columns in {type(self).__name__}.{how} "
1200-
"is deprecated. In a future version, a TypeError will be raised. "
1201-
f"Before calling .{how}, select only columns which should be "
1202-
"valid for the transforming function.",
1203-
FutureWarning,
1204-
stacklevel=4,
1205-
)
1121+
warn_dropping_nuisance_columns_deprecated(type(self), how)
12061122

12071123
res_df = self.obj._constructor(res_mgr)
12081124
if self.axis == 1:
@@ -1314,14 +1230,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
13141230
output[i] = sgb.transform(wrapper)
13151231
except TypeError:
13161232
# e.g. trying to call nanmean with string values
1317-
warnings.warn(
1318-
f"Dropping invalid columns in {type(self).__name__}.transform "
1319-
"is deprecated. In a future version, a TypeError will be raised. "
1320-
"Before calling .transform, select only columns which should be "
1321-
"valid for the transforming function.",
1322-
FutureWarning,
1323-
stacklevel=5,
1324-
)
1233+
warn_dropping_nuisance_columns_deprecated(type(self), "transform")
13251234
else:
13261235
inds.append(i)
13271236

pandas/core/groupby/groupby.py

Lines changed: 73 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,14 +1424,7 @@ def _python_agg_general(self, func, *args, **kwargs):
14241424
# if this function is invalid for this dtype, we will ignore it.
14251425
result = self.grouper.agg_series(obj, f)
14261426
except TypeError:
1427-
warnings.warn(
1428-
f"Dropping invalid columns in {type(self).__name__}.agg "
1429-
"is deprecated. In a future version, a TypeError will be raised. "
1430-
"Before calling .agg, select only columns which should be "
1431-
"valid for the aggregating function.",
1432-
FutureWarning,
1433-
stacklevel=3,
1434-
)
1427+
warn_dropping_nuisance_columns_deprecated(type(self), "agg")
14351428
continue
14361429

14371430
key = base.OutputKey(label=name, position=idx)
@@ -1502,10 +1495,52 @@ def _agg_py_fallback(
15021495
# test_groupby_duplicate_columns with object dtype values
15031496
return ensure_block_shape(res_values, ndim=ndim)
15041497

1498+
@final
15051499
def _cython_agg_general(
15061500
self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
15071501
):
1508-
raise AbstractMethodError(self)
1502+
# Note: we never get here with how="ohlc" for DataFrameGroupBy;
1503+
# that goes through SeriesGroupBy
1504+
1505+
data = self._get_data_to_aggregate()
1506+
is_ser = data.ndim == 1
1507+
1508+
if numeric_only:
1509+
if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
1510+
# GH#41291 match Series behavior
1511+
raise NotImplementedError(
1512+
f"{type(self).__name__}.{how} does not implement numeric_only."
1513+
)
1514+
elif not is_ser:
1515+
data = data.get_numeric_data(copy=False)
1516+
1517+
def array_func(values: ArrayLike) -> ArrayLike:
1518+
try:
1519+
result = self.grouper._cython_operation(
1520+
"aggregate", values, how, axis=data.ndim - 1, min_count=min_count
1521+
)
1522+
except NotImplementedError:
1523+
# generally if we have numeric_only=False
1524+
# and non-applicable functions
1525+
# try to python agg
1526+
# TODO: shouldn't min_count matter?
1527+
result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1528+
1529+
return result
1530+
1531+
# TypeError -> we may have an exception in trying to aggregate
1532+
# continue and exclude the block
1533+
new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
1534+
1535+
if not is_ser and len(new_mgr) < len(data):
1536+
warn_dropping_nuisance_columns_deprecated(type(self), how)
1537+
1538+
res = self._wrap_agged_manager(new_mgr)
1539+
if is_ser:
1540+
res.index = self.grouper.result_index
1541+
return self._reindex_output(res)
1542+
else:
1543+
return res
15091544

15101545
def _cython_transform(
15111546
self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
@@ -1771,8 +1806,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
17711806
# _wrap_agged_manager() returns. GH 35028
17721807
with com.temp_setattr(self, "observed", True):
17731808
result = self._wrap_agged_manager(new_mgr)
1774-
if result.ndim == 1:
1775-
result.index = self.grouper.result_index
1809+
1810+
if result.ndim == 1:
1811+
result.index = self.grouper.result_index
17761812

17771813
return self._reindex_output(result, fill_value=0)
17781814

@@ -2710,18 +2746,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
27102746

27112747
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
27122748
if not is_ser and len(res_mgr.items) != len(mgr.items):
2713-
warnings.warn(
2714-
"Dropping invalid columns in "
2715-
f"{type(self).__name__}.quantile is deprecated. "
2716-
"In a future version, a TypeError will be raised. "
2717-
"Before calling .quantile, select only columns which "
2718-
"should be valid for the function.",
2719-
FutureWarning,
2720-
stacklevel=find_stack_level(),
2721-
)
2749+
warn_dropping_nuisance_columns_deprecated(type(self), "quantile")
2750+
27222751
if len(res_mgr.items) == 0:
27232752
# re-call grouped_reduce to get the desired exception message
27242753
mgr.grouped_reduce(blk_func, ignore_failures=False)
2754+
# grouped_reduce _should_ raise, so this should not be reached
2755+
raise TypeError( # pragma: no cover
2756+
"All columns were dropped in grouped_reduce"
2757+
)
27252758

27262759
if is_ser:
27272760
res = self._wrap_agged_manager(res_mgr)
@@ -3154,30 +3187,20 @@ def blk_func(values: ArrayLike) -> ArrayLike:
31543187

31553188
if not is_ser and len(res_mgr.items) != len(mgr.items):
31563189
howstr = how.replace("group_", "")
3157-
warnings.warn(
3158-
"Dropping invalid columns in "
3159-
f"{type(self).__name__}.{howstr} is deprecated. "
3160-
"In a future version, a TypeError will be raised. "
3161-
f"Before calling .{howstr}, select only columns which "
3162-
"should be valid for the function.",
3163-
FutureWarning,
3164-
stacklevel=3,
3165-
)
3190+
warn_dropping_nuisance_columns_deprecated(type(self), howstr)
3191+
31663192
if len(res_mgr.items) == 0:
31673193
# We re-call grouped_reduce to get the right exception message
3168-
try:
3169-
mgr.grouped_reduce(blk_func, ignore_failures=False)
3170-
except Exception as err:
3171-
error_msg = str(err)
3172-
raise TypeError(error_msg)
3173-
# We should never get here
3174-
raise TypeError("All columns were dropped in grouped_reduce")
3194+
mgr.grouped_reduce(blk_func, ignore_failures=False)
3195+
# grouped_reduce _should_ raise, so this should not be reached
3196+
raise TypeError( # pragma: no cover
3197+
"All columns were dropped in grouped_reduce"
3198+
)
31753199

31763200
if is_ser:
31773201
out = self._wrap_agged_manager(res_mgr)
3178-
out.index = self.grouper.result_index
31793202
else:
3180-
out = type(obj)(res_mgr)
3203+
out = obj._constructor(res_mgr)
31813204

31823205
return self._wrap_aggregated_output(out)
31833206

@@ -3631,3 +3654,15 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
36313654
else:
36323655
mi = MultiIndex.from_product([idx, qs])
36333656
return mi
3657+
3658+
3659+
def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
3660+
warnings.warn(
3661+
"Dropping invalid columns in "
3662+
f"{cls.__name__}.{how} is deprecated. "
3663+
"In a future version, a TypeError will be raised. "
3664+
f"Before calling .{how}, select only columns which "
3665+
"should be valid for the function.",
3666+
FutureWarning,
3667+
stacklevel=find_stack_level(),
3668+
)

0 commit comments

Comments
 (0)