pandas-dev · mroeschke · Feb 7, 2023 · Feb 3, 2023 · Feb 3, 2023 · Feb 3, 2023
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
@@ -55,6 +55,7 @@ def group_any_all(
     mask: np.ndarray,  # const uint8_t[::1]
     val_test: Literal["any", "all"],
     skipna: bool,
+    nullable: bool,
 ) -> None: ...
 def group_sum(
     out: np.ndarray,  # complexfloatingintuint_t[:, ::1]

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -2042,7 +2042,7 @@ def _compute_na_values():
         np.uint16: uint16info.max,
         np.uint8: uint8info.max,
         np.bool_: uint8info.max,
-        np.object_: np.nan   # oof
+        np.object_: np.nan,
     }
     return na_values
 

diff --git a/pandas/_testing/compat.py b/pandas/_testing/compat.py
@@ -1,11 +1,12 @@
 """
 Helpers for sharing tests between DataFrame/Series
 """
+from pandas._typing import DtypeObj
 
 from pandas import DataFrame
 
 
-def get_dtype(obj):
+def get_dtype(obj) -> DtypeObj:
     if isinstance(obj, DataFrame):
         # Note: we are assuming only one column
         return obj.dtypes.iat[0]

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -17,6 +17,7 @@
 - Dtypes
 - Misc
 """
+from __future__ import annotations
 
 from collections import abc
 from datetime import (
@@ -31,6 +32,7 @@
 import os
 from typing import (
     Callable,
+    Hashable,
     Iterator,
 )
 
@@ -1881,7 +1883,7 @@ def __init__(self, **kwargs) -> None:
         (pd.NA, pd.NA, pd.NA),
     ]
 )
-def names(request):
+def names(request) -> tuple[Hashable, Hashable, Hashable]:
     """
     A 3-tuple of names, the first two for operands, the last for a result.
     """
@@ -1937,7 +1939,7 @@ def indexer_ial(request):
 
 
 @pytest.fixture
-def using_array_manager():
+def using_array_manager() -> bool:
     """
     Fixture to check if the array manager is being used.
     """
@@ -1958,7 +1960,7 @@ def using_copy_on_write() -> bool:
 
 
 @pytest.fixture(params=warsaws)
-def warsaw(request):
+def warsaw(request) -> str:
     """
     tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
     """

diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py
@@ -4,7 +4,11 @@
 
 import numpy as np
 
-from pandas._typing import DtypeObj
+from pandas._typing import (
+    TYPE_CHECKING,
+    DtypeObj,
+    type_t,
+)
 from pandas.compat import pa_version_under7p0
 from pandas.util._decorators import cache_readonly
 
@@ -16,6 +20,9 @@
 if not pa_version_under7p0:
     import pyarrow as pa
 
+if TYPE_CHECKING:
+    from pandas.core.arrays.arrow import ArrowExtensionArray
+
 
 @register_extension_dtype
 class ArrowDtype(StorageExtensionDtype):
@@ -113,7 +120,7 @@ def itemsize(self) -> int:
         return self.numpy_dtype.itemsize
 
     @classmethod
-    def construct_array_type(cls):
+    def construct_array_type(cls) -> type_t[ArrowExtensionArray]:
         """
         Return the array type associated with this dtype.
 

@@ -66,10 +66,7 @@
     notna,
 )
 
-from pandas.core import (
-    algorithms,
-    nanops,
-)
+from pandas.core import algorithms
 from pandas.core.apply import (
     GroupByApply,
     maybe_mangle_lambdas,
@@ -98,6 +95,7 @@
 from pandas.plotting import boxplot_frame_groupby
 
 if TYPE_CHECKING:
+    from pandas import Categorical
     from pandas.core.generic import NDFrame
 
 # TODO(typing) the return value on this callable should be any *scalar*.
@@ -138,29 +136,6 @@ class NamedAgg(NamedTuple):
     aggfunc: AggScalar
 
 
-def generate_property(name: str, klass: type[DataFrame | Series]):
-    """
-    Create a property for a GroupBy subclass to dispatch to DataFrame/Series.
-
-    Parameters
-    ----------
-    name : str
-    klass : {DataFrame, Series}
-
-    Returns
-    -------
-    property
-    """
-
-    def prop(self):
-        return self._make_wrapper(name)
-
-    parent_method = getattr(klass, name)
-    prop.__doc__ = parent_method.__doc__ or ""
-    prop.__name__ = name
-    return property(prop)
-
-
 class SeriesGroupBy(GroupBy[Series]):
     def _wrap_agged_manager(self, mgr: Manager) -> Series:
         return self.obj._constructor(mgr, name=self.obj.name)
@@ -718,18 +693,13 @@ def value_counts(
         else:
 
             # lab is a Categorical with categories an IntervalIndex
-            lab = cut(Series(val), bins, include_lowest=True)
-            # error: "ndarray" has no attribute "cat"
-            lev = lab.cat.categories  # type: ignore[attr-defined]
-            # error: No overload variant of "take" of "_ArrayOrScalarCommon" matches
-            # argument types "Any", "bool", "Union[Any, float]"
-            lab = lev.take(  # type: ignore[call-overload]
-                # error: "ndarray" has no attribute "cat"
-                lab.cat.codes,  # type: ignore[attr-defined]
+            cat_ser = cut(Series(val), bins, include_lowest=True)
+            cat_obj = cast("Categorical", cat_ser._values)
+            lev = cat_obj.categories
+            lab = lev.take(
+                cat_obj.codes,
                 allow_fill=True,
-                # error: Item "ndarray" of "Union[ndarray, Index]" has no attribute
-                # "_na_value"
-                fill_value=lev._na_value,  # type: ignore[union-attr]
+                fill_value=lev._na_value,
             )
             llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
 
@@ -1544,7 +1514,6 @@ def _cython_transform(
         **kwargs,
     ) -> DataFrame:
         assert axis == 0  # handled by caller
-        # TODO: no tests with self.ndim == 1 for DataFrameGroupBy
 
         # With self.axis == 0, we have multi-block tests
         #  e.g. test_rank_min_int, test_cython_transform_frame
@@ -2058,17 +2027,7 @@ def idxmax(
             axis = self.axis
 
         def func(df):
-            res = df._reduce(
-                nanops.nanargmax,
-                "argmax",
-                axis=axis,
-                skipna=skipna,
-                numeric_only=numeric_only,
-            )
-            indices = res._values
-            index = df._get_axis(axis)
-            result = [index[i] if i >= 0 else np.nan for i in indices]
-            return df._constructor_sliced(result, index=res.index)
+            return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only)
 
         func.__name__ = "idxmax"
         result = self._python_apply_general(
@@ -2154,17 +2113,7 @@ def idxmin(
             axis = self.axis
 
         def func(df):
-            res = df._reduce(
-                nanops.nanargmin,
-                "argmin",
-                axis=axis,
-                skipna=skipna,
-                numeric_only=numeric_only,
-            )
-            indices = res._values
-            index = df._get_axis(axis)
-            result = [index[i] if i >= 0 else np.nan for i in indices]
-            return df._constructor_sliced(result, index=res.index)
+            return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only)
 
         func.__name__ = "idxmin"
         result = self._python_apply_general(

@@ -1380,9 +1380,7 @@ def _python_apply_general(
             this can be coincidental leading to value-dependent behavior.
         is_transform : bool, default False
             Indicator for whether the function is actually a transform
-            and should not have group keys prepended. This is used
-            in _make_wrapper which generates both transforms (e.g. diff)
-            and non-transforms (e.g. corr)
+            and should not have group keys prepended.
         is_agg : bool, default False
             Indicator for whether the function is an aggregation. When the
             result is empty, we don't want to warn for this case.
@@ -4110,15 +4108,8 @@ def get_groupby(
     obj: NDFrame,
     by: _KeysArgType | None = None,
     axis: AxisInt = 0,
-    level=None,
     grouper: ops.BaseGrouper | None = None,
-    exclusions=None,
-    selection=None,
-    as_index: bool = True,
-    sort: bool = True,
     group_keys: bool | lib.NoDefault = True,
-    observed: bool = False,
-    dropna: bool = True,
 ) -> GroupBy:
 
     klass: type[GroupBy]
@@ -4137,15 +4128,8 @@ def get_groupby(
         obj=obj,
         keys=by,
         axis=axis,
-        level=level,
         grouper=grouper,
-        exclusions=exclusions,
-        selection=selection,
-        as_index=as_index,
-        sort=sort,
         group_keys=group_keys,
-        observed=observed,
-        dropna=dropna,
     )
 
 

@@ -743,7 +743,7 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter:
         Generator yielding subsetted objects
         """
         ids, _, ngroups = self.group_info
-        return get_splitter(data, ids, ngroups, axis=axis)
+        return _get_splitter(data, ids, ngroups, axis=axis)
 
     @final
     @cache_readonly
@@ -1017,13 +1017,12 @@ def agg_series(
     def _aggregate_series_pure_python(
         self, obj: Series, func: Callable
     ) -> npt.NDArray[np.object_]:
-        ids, _, ngroups = self.group_info
+        _, _, ngroups = self.group_info
 
         result = np.empty(ngroups, dtype="O")
         initialized = False
 
-        # equiv: splitter = self._get_splitter(obj, axis=0)
-        splitter = get_splitter(obj, ids, ngroups, axis=0)
+        splitter = self._get_splitter(obj, axis=0)
 
         for i, group in enumerate(splitter):
             res = func(group)
@@ -1268,7 +1267,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         return df.__finalize__(sdata, method="groupby")
 
 
-def get_splitter(
+def _get_splitter(
     data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0
 ) -> DataSplitter:
     if isinstance(data, Series):

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -261,10 +261,6 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
         new_mgr_locs = self._mgr_locs[slicer]
 
         new_values = self._slice(slicer)
-
-        if new_values.ndim != self.values.ndim:
-            raise ValueError("Only same dim slicing is allowed")
-
         return type(self)(new_values, new_mgr_locs, self.ndim)
 
     @final

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -161,9 +161,7 @@ def __init__(
         self._timegrouper = timegrouper
         self.keys = None
         self.sort = True
-        # error: Incompatible types in assignment (expression has type "Union
-        # [int, Literal['index', 'columns', 'rows']]", variable has type "int")
-        self.axis = axis  # type: ignore[assignment]
+        self.axis = obj._get_axis_number(axis)
         self.kind = kind
         self.group_keys = group_keys
         self.as_index = True

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -708,6 +708,7 @@ def test_cython_transform_frame(op, args, targop):
             {"by": "string"},
         ]:  # {"by": 'string_missing'}]:
             # {"by": ['int','string']}]:
+            # TODO: remove or enable commented-out code
 
             gb = df.groupby(group_keys=False, **gb_target)
 

diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
@@ -560,8 +560,6 @@ def _check_plot_works(f, default_axes=False, **kwargs):
         with tm.ensure_clean(return_filelike=True) as path:
             plt.savefig(path)
 
-    except Exception as err:
-        raise err
     finally:
         tm.close(fig)