69
69
validate_func_kwargs ,
70
70
)
71
71
from pandas .core .apply import GroupByApply
72
- from pandas .core .arrays import Categorical
73
72
from pandas .core .base import (
74
73
DataError ,
75
74
SpecificationError ,
84
83
_agg_template ,
85
84
_apply_docs ,
86
85
_transform_template ,
87
- get_groupby ,
88
86
group_selection_context ,
89
87
)
90
88
from pandas .core .indexes .api import (
@@ -353,6 +351,7 @@ def _cython_agg_general(
353
351
354
352
obj = self ._selected_obj
355
353
objvals = obj ._values
354
+ data = obj ._mgr
356
355
357
356
if numeric_only and not is_numeric_dtype (obj .dtype ):
358
357
raise DataError ("No numeric types to aggregate" )
@@ -362,28 +361,15 @@ def _cython_agg_general(
362
361
def array_func (values : ArrayLike ) -> ArrayLike :
363
362
try :
364
363
result = self .grouper ._cython_operation (
365
- "aggregate" , values , how , axis = 0 , min_count = min_count
364
+ "aggregate" , values , how , axis = data . ndim - 1 , min_count = min_count
366
365
)
367
366
except NotImplementedError :
368
- ser = Series (values ) # equiv 'obj' from outer frame
369
- if self .ngroups > 0 :
370
- res_values , _ = self .grouper .agg_series (ser , alt )
371
- else :
372
- # equiv: res_values = self._python_agg_general(alt)
373
- # error: Incompatible types in assignment (expression has
374
- # type "Union[DataFrame, Series]", variable has type
375
- # "Union[ExtensionArray, ndarray]")
376
- res_values = self ._python_apply_general ( # type: ignore[assignment]
377
- alt , ser
378
- )
367
+ # generally if we have numeric_only=False
368
+ # and non-applicable functions
369
+ # try to python agg
370
+ # TODO: shouldn't min_count matter?
371
+ result = self ._agg_py_fallback (values , ndim = data .ndim , alt = alt )
379
372
380
- if isinstance (values , Categorical ):
381
- # Because we only get here with known dtype-preserving
382
- # reductions, we cast back to Categorical.
383
- # TODO: if we ever get "rank" working, exclude it here.
384
- result = type (values )._from_sequence (res_values , dtype = values .dtype )
385
- else :
386
- result = res_values
387
373
return result
388
374
389
375
result = array_func (objvals )
@@ -1116,72 +1102,17 @@ def _cython_agg_general(
1116
1102
if numeric_only :
1117
1103
data = data .get_numeric_data (copy = False )
1118
1104
1119
- def cast_agg_result (result : ArrayLike , values : ArrayLike ) -> ArrayLike :
1120
- # see if we can cast the values to the desired dtype
1121
- # this may not be the original dtype
1122
-
1123
- if isinstance (result .dtype , np .dtype ) and result .ndim == 1 :
1124
- # We went through a SeriesGroupByPath and need to reshape
1125
- # GH#32223 includes case with IntegerArray values
1126
- # We only get here with values.dtype == object
1127
- result = result .reshape (1 , - 1 )
1128
- # test_groupby_duplicate_columns gets here with
1129
- # result.dtype == int64, values.dtype=object, how="min"
1130
-
1131
- return result
1132
-
1133
- def py_fallback (values : ArrayLike ) -> ArrayLike :
1134
- # if self.grouper.aggregate fails, we fall back to a pure-python
1135
- # solution
1136
-
1137
- # We get here with a) EADtypes and b) object dtype
1138
- obj : FrameOrSeriesUnion
1139
-
1140
- # call our grouper again with only this block
1141
- if values .ndim == 1 :
1142
- # We only get here with ExtensionArray
1143
-
1144
- obj = Series (values )
1145
- else :
1146
- # We only get here with values.dtype == object
1147
- # TODO special case not needed with ArrayManager
1148
- df = DataFrame (values .T )
1149
- # bc we split object blocks in grouped_reduce, we have only 1 col
1150
- # otherwise we'd have to worry about block-splitting GH#39329
1151
- assert df .shape [1 ] == 1
1152
- # Avoid call to self.values that can occur in DataFrame
1153
- # reductions; see GH#28949
1154
- obj = df .iloc [:, 0 ]
1155
-
1156
- # Create SeriesGroupBy with observed=True so that it does
1157
- # not try to add missing categories if grouping over multiple
1158
- # Categoricals. This will done by later self._reindex_output()
1159
- # Doing it here creates an error. See GH#34951
1160
- sgb = get_groupby (obj , self .grouper , observed = True )
1161
-
1162
- # Note: bc obj is always a Series here, we can ignore axis and pass
1163
- # `alt` directly instead of `lambda x: alt(x, axis=self.axis)`
1164
- # use _agg_general bc it will go through _cython_agg_general
1165
- # which will correctly cast Categoricals.
1166
- res_ser = sgb ._agg_general (
1167
- numeric_only = False , min_count = min_count , alias = how , npfunc = alt
1168
- )
1169
-
1170
- # unwrap Series to get array
1171
- res_values = res_ser ._mgr .arrays [0 ]
1172
- return cast_agg_result (res_values , values )
1173
-
1174
1105
def array_func (values : ArrayLike ) -> ArrayLike :
1175
-
1176
1106
try :
1177
1107
result = self .grouper ._cython_operation (
1178
- "aggregate" , values , how , axis = 1 , min_count = min_count
1108
+ "aggregate" , values , how , axis = data . ndim - 1 , min_count = min_count
1179
1109
)
1180
1110
except NotImplementedError :
1181
1111
# generally if we have numeric_only=False
1182
1112
# and non-applicable functions
1183
1113
# try to python agg
1184
- result = py_fallback (values )
1114
+ # TODO: shouldn't min_count matter?
1115
+ result = self ._agg_py_fallback (values , ndim = data .ndim , alt = alt )
1185
1116
1186
1117
return result
1187
1118
0 commit comments