@@ -1424,14 +1424,7 @@ def _python_agg_general(self, func, *args, **kwargs):
1424
1424
# if this function is invalid for this dtype, we will ignore it.
1425
1425
result = self .grouper .agg_series (obj , f )
1426
1426
except TypeError :
1427
- warnings .warn (
1428
- f"Dropping invalid columns in { type (self ).__name__ } .agg "
1429
- "is deprecated. In a future version, a TypeError will be raised. "
1430
- "Before calling .agg, select only columns which should be "
1431
- "valid for the aggregating function." ,
1432
- FutureWarning ,
1433
- stacklevel = 3 ,
1434
- )
1427
+ warn_dropping_nuisance_columns_deprecated (type (self ), "agg" )
1435
1428
continue
1436
1429
1437
1430
key = base .OutputKey (label = name , position = idx )
@@ -1502,10 +1495,52 @@ def _agg_py_fallback(
1502
1495
# test_groupby_duplicate_columns with object dtype values
1503
1496
return ensure_block_shape (res_values , ndim = ndim )
1504
1497
1498
+ @final
1505
1499
def _cython_agg_general (
1506
1500
self , how : str , alt : Callable , numeric_only : bool , min_count : int = - 1
1507
1501
):
1508
- raise AbstractMethodError (self )
1502
+ # Note: we never get here with how="ohlc" for DataFrameGroupBy;
1503
+ # that goes through SeriesGroupBy
1504
+
1505
+ data = self ._get_data_to_aggregate ()
1506
+ is_ser = data .ndim == 1
1507
+
1508
+ if numeric_only :
1509
+ if is_ser and not is_numeric_dtype (self ._selected_obj .dtype ):
1510
+ # GH#41291 match Series behavior
1511
+ raise NotImplementedError (
1512
+ f"{ type (self ).__name__ } .{ how } does not implement numeric_only."
1513
+ )
1514
+ elif not is_ser :
1515
+ data = data .get_numeric_data (copy = False )
1516
+
1517
+ def array_func (values : ArrayLike ) -> ArrayLike :
1518
+ try :
1519
+ result = self .grouper ._cython_operation (
1520
+ "aggregate" , values , how , axis = data .ndim - 1 , min_count = min_count
1521
+ )
1522
+ except NotImplementedError :
1523
+ # generally if we have numeric_only=False
1524
+ # and non-applicable functions
1525
+ # try to python agg
1526
+ # TODO: shouldn't min_count matter?
1527
+ result = self ._agg_py_fallback (values , ndim = data .ndim , alt = alt )
1528
+
1529
+ return result
1530
+
1531
+ # TypeError -> we may have an exception in trying to aggregate
1532
+ # continue and exclude the block
1533
+ new_mgr = data .grouped_reduce (array_func , ignore_failures = True )
1534
+
1535
+ if not is_ser and len (new_mgr ) < len (data ):
1536
+ warn_dropping_nuisance_columns_deprecated (type (self ), how )
1537
+
1538
+ res = self ._wrap_agged_manager (new_mgr )
1539
+ if is_ser :
1540
+ res .index = self .grouper .result_index
1541
+ return self ._reindex_output (res )
1542
+ else :
1543
+ return res
1509
1544
1510
1545
def _cython_transform (
1511
1546
self , how : str , numeric_only : bool = True , axis : int = 0 , ** kwargs
@@ -1771,8 +1806,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
1771
1806
# _wrap_agged_manager() returns. GH 35028
1772
1807
with com .temp_setattr (self , "observed" , True ):
1773
1808
result = self ._wrap_agged_manager (new_mgr )
1774
- if result .ndim == 1 :
1775
- result .index = self .grouper .result_index
1809
+
1810
+ if result .ndim == 1 :
1811
+ result .index = self .grouper .result_index
1776
1812
1777
1813
return self ._reindex_output (result , fill_value = 0 )
1778
1814
@@ -2710,18 +2746,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
2710
2746
2711
2747
res_mgr = mgr .grouped_reduce (blk_func , ignore_failures = True )
2712
2748
if not is_ser and len (res_mgr .items ) != len (mgr .items ):
2713
- warnings .warn (
2714
- "Dropping invalid columns in "
2715
- f"{ type (self ).__name__ } .quantile is deprecated. "
2716
- "In a future version, a TypeError will be raised. "
2717
- "Before calling .quantile, select only columns which "
2718
- "should be valid for the function." ,
2719
- FutureWarning ,
2720
- stacklevel = find_stack_level (),
2721
- )
2749
+ warn_dropping_nuisance_columns_deprecated (type (self ), "quantile" )
2750
+
2722
2751
if len (res_mgr .items ) == 0 :
2723
2752
# re-call grouped_reduce to get the desired exception message
2724
2753
mgr .grouped_reduce (blk_func , ignore_failures = False )
2754
+ # grouped_reduce _should_ raise, so this should not be reached
2755
+ raise TypeError ( # pragma: no cover
2756
+ "All columns were dropped in grouped_reduce"
2757
+ )
2725
2758
2726
2759
if is_ser :
2727
2760
res = self ._wrap_agged_manager (res_mgr )
@@ -3154,30 +3187,20 @@ def blk_func(values: ArrayLike) -> ArrayLike:
3154
3187
3155
3188
if not is_ser and len (res_mgr .items ) != len (mgr .items ):
3156
3189
howstr = how .replace ("group_" , "" )
3157
- warnings .warn (
3158
- "Dropping invalid columns in "
3159
- f"{ type (self ).__name__ } .{ howstr } is deprecated. "
3160
- "In a future version, a TypeError will be raised. "
3161
- f"Before calling .{ howstr } , select only columns which "
3162
- "should be valid for the function." ,
3163
- FutureWarning ,
3164
- stacklevel = 3 ,
3165
- )
3190
+ warn_dropping_nuisance_columns_deprecated (type (self ), howstr )
3191
+
3166
3192
if len (res_mgr .items ) == 0 :
3167
3193
# We re-call grouped_reduce to get the right exception message
3168
- try :
3169
- mgr .grouped_reduce (blk_func , ignore_failures = False )
3170
- except Exception as err :
3171
- error_msg = str (err )
3172
- raise TypeError (error_msg )
3173
- # We should never get here
3174
- raise TypeError ("All columns were dropped in grouped_reduce" )
3194
+ mgr .grouped_reduce (blk_func , ignore_failures = False )
3195
+ # grouped_reduce _should_ raise, so this should not be reached
3196
+ raise TypeError ( # pragma: no cover
3197
+ "All columns were dropped in grouped_reduce"
3198
+ )
3175
3199
3176
3200
if is_ser :
3177
3201
out = self ._wrap_agged_manager (res_mgr )
3178
- out .index = self .grouper .result_index
3179
3202
else :
3180
- out = type ( obj ) (res_mgr )
3203
+ out = obj . _constructor (res_mgr )
3181
3204
3182
3205
return self ._wrap_aggregated_output (out )
3183
3206
@@ -3631,3 +3654,15 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
3631
3654
else :
3632
3655
mi = MultiIndex .from_product ([idx , qs ])
3633
3656
return mi
3657
+
3658
+
3659
+ def warn_dropping_nuisance_columns_deprecated (cls , how : str ) -> None :
3660
+ warnings .warn (
3661
+ "Dropping invalid columns in "
3662
+ f"{ cls .__name__ } .{ how } is deprecated. "
3663
+ "In a future version, a TypeError will be raised. "
3664
+ f"Before calling .{ how } , select only columns which "
3665
+ "should be valid for the function." ,
3666
+ FutureWarning ,
3667
+ stacklevel = find_stack_level (),
3668
+ )
0 commit comments