REF: dont special-case ngroups==0 (#41331)

jbrockmendel · web-flow · commit a6db65266f38 · 2021-05-06T19:22:16.000-04:00
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -105,10 +105,11 @@ cdef class SeriesBinGrouper(_BaseGrouper):
         Py_ssize_t nresults, ngroups
 
     cdef public:
+        ndarray bins  # ndarray[int64_t]
         ndarray arr, index, dummy_arr, dummy_index
-        object values, f, bins, typ, ityp, name, idtype
+        object values, f, typ, ityp, name, idtype
 
-    def __init__(self, object series, object f, object bins):
+    def __init__(self, object series, object f, ndarray[int64_t] bins):
 
         assert len(bins) > 0  # otherwise we get IndexError in get_result
 
@@ -133,6 +134,8 @@ cdef class SeriesBinGrouper(_BaseGrouper):
         if len(bins) > 0 and bins[-1] == len(series):
             self.ngroups = len(bins)
         else:
+            # TODO: not reached except in test_series_bin_grouper directly
+            #  constructing SeriesBinGrouper; can we rule this case out?
             self.ngroups = len(bins) + 1
 
     def get_result(self):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1331,18 +1331,7 @@ def _agg_py_fallback(
             #  reductions; see GH#28949
             ser = df.iloc[:, 0]
 
-        # Create SeriesGroupBy with observed=True so that it does
-        # not try to add missing categories if grouping over multiple
-        # Categoricals. This will done by later self._reindex_output()
-        # Doing it here creates an error. See GH#34951
-        sgb = get_groupby(ser, self.grouper, observed=True)
-        # For SeriesGroupBy we could just use self instead of sgb
-
-        if self.ngroups > 0:
-            res_values = self.grouper.agg_series(ser, alt)
-        else:
-            # equiv: res_values = self._python_agg_general(alt)
-            res_values = sgb._python_apply_general(alt, ser)._values
+        res_values = self.grouper.agg_series(ser, alt)
 
         if isinstance(values, Categorical):
             # Because we only get here with known dtype-preserving
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -968,8 +968,8 @@ def _cython_operation(
 
     @final
     def agg_series(self, obj: Series, func: F) -> ArrayLike:
-        # Caller is responsible for checking ngroups != 0
-        assert self.ngroups != 0
+        # test_groupby_empty_with_category gets here with self.ngroups == 0
+        #  and len(obj) > 0
 
         cast_back = True
         if len(obj) == 0:
@@ -1006,7 +1006,6 @@ def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray:
         #  - obj.index is not a MultiIndex
         #  - obj is backed by an ndarray, not ExtensionArray
         #  - len(obj) > 0
-        #  - ngroups != 0
         func = com.is_builtin_func(func)
 
         ids, _, ngroups = self.group_info
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
@@ -56,7 +56,7 @@ def test_series_grouper_requires_nonempty_raises():
 def test_series_bin_grouper():
     obj = Series(np.random.randn(10))
 
-    bins = np.array([3, 6])
+    bins = np.array([3, 6], dtype=np.int64)
 
     grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
     result, counts = grouper.get_result()