Skip to content

CLN: Assorted #51136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def group_any_all(
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
nullable: bool,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2042,7 +2042,7 @@ def _compute_na_values():
np.uint16: uint16info.max,
np.uint8: uint8info.max,
np.bool_: uint8info.max,
np.object_: np.nan # oof
np.object_: np.nan,
}
return na_values

Expand Down
3 changes: 2 additions & 1 deletion pandas/_testing/compat.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""
Helpers for sharing tests between DataFrame/Series
"""
from pandas._typing import DtypeObj

from pandas import DataFrame


def get_dtype(obj):
def get_dtype(obj) -> DtypeObj:
if isinstance(obj, DataFrame):
# Note: we are assuming only one column
return obj.dtypes.iat[0]
Expand Down
8 changes: 5 additions & 3 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- Dtypes
- Misc
"""
from __future__ import annotations

from collections import abc
from datetime import (
Expand All @@ -31,6 +32,7 @@
import os
from typing import (
Callable,
Hashable,
Iterator,
)

Expand Down Expand Up @@ -1881,7 +1883,7 @@ def __init__(self, **kwargs) -> None:
(pd.NA, pd.NA, pd.NA),
]
)
def names(request):
def names(request) -> tuple[Hashable, Hashable, Hashable]:
"""
A 3-tuple of names, the first two for operands, the last for a result.
"""
Expand Down Expand Up @@ -1937,7 +1939,7 @@ def indexer_ial(request):


@pytest.fixture
def using_array_manager():
def using_array_manager() -> bool:
"""
Fixture to check if the array manager is being used.
"""
Expand All @@ -1958,7 +1960,7 @@ def using_copy_on_write() -> bool:


@pytest.fixture(params=warsaws)
def warsaw(request):
def warsaw(request) -> str:
"""
tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
"""
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/arrays/arrow/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

import numpy as np

from pandas._typing import DtypeObj
from pandas._typing import (
TYPE_CHECKING,
DtypeObj,
type_t,
)
from pandas.compat import pa_version_under7p0
from pandas.util._decorators import cache_readonly

Expand All @@ -16,6 +20,9 @@
if not pa_version_under7p0:
import pyarrow as pa

if TYPE_CHECKING:
from pandas.core.arrays.arrow import ArrowExtensionArray


@register_extension_dtype
class ArrowDtype(StorageExtensionDtype):
Expand Down Expand Up @@ -113,7 +120,7 @@ def itemsize(self) -> int:
return self.numpy_dtype.itemsize

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> type_t[ArrowExtensionArray]:
"""
Return the array type associated with this dtype.

Expand Down
71 changes: 10 additions & 61 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@
notna,
)

from pandas.core import (
algorithms,
nanops,
)
from pandas.core import algorithms
from pandas.core.apply import (
GroupByApply,
maybe_mangle_lambdas,
Expand Down Expand Up @@ -98,6 +95,7 @@
from pandas.plotting import boxplot_frame_groupby

if TYPE_CHECKING:
from pandas import Categorical
from pandas.core.generic import NDFrame

# TODO(typing) the return value on this callable should be any *scalar*.
Expand Down Expand Up @@ -138,29 +136,6 @@ class NamedAgg(NamedTuple):
aggfunc: AggScalar


def generate_property(name: str, klass: type[DataFrame | Series]):
"""
Create a property for a GroupBy subclass to dispatch to DataFrame/Series.

Parameters
----------
name : str
klass : {DataFrame, Series}

Returns
-------
property
"""

def prop(self):
return self._make_wrapper(name)

parent_method = getattr(klass, name)
prop.__doc__ = parent_method.__doc__ or ""
prop.__name__ = name
return property(prop)


class SeriesGroupBy(GroupBy[Series]):
def _wrap_agged_manager(self, mgr: Manager) -> Series:
return self.obj._constructor(mgr, name=self.obj.name)
Expand Down Expand Up @@ -718,18 +693,13 @@ def value_counts(
else:

# lab is a Categorical with categories an IntervalIndex
lab = cut(Series(val), bins, include_lowest=True)
# error: "ndarray" has no attribute "cat"
lev = lab.cat.categories # type: ignore[attr-defined]
# error: No overload variant of "take" of "_ArrayOrScalarCommon" matches
# argument types "Any", "bool", "Union[Any, float]"
lab = lev.take( # type: ignore[call-overload]
# error: "ndarray" has no attribute "cat"
lab.cat.codes, # type: ignore[attr-defined]
cat_ser = cut(Series(val), bins, include_lowest=True)
cat_obj = cast("Categorical", cat_ser._values)
lev = cat_obj.categories
lab = lev.take(
cat_obj.codes,
allow_fill=True,
# error: Item "ndarray" of "Union[ndarray, Index]" has no attribute
# "_na_value"
fill_value=lev._na_value, # type: ignore[union-attr]
fill_value=lev._na_value,
)
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]

Expand Down Expand Up @@ -1544,7 +1514,6 @@ def _cython_transform(
**kwargs,
) -> DataFrame:
assert axis == 0 # handled by caller
# TODO: no tests with self.ndim == 1 for DataFrameGroupBy

# With self.axis == 0, we have multi-block tests
# e.g. test_rank_min_int, test_cython_transform_frame
Expand Down Expand Up @@ -2058,17 +2027,7 @@ def idxmax(
axis = self.axis

def func(df):
res = df._reduce(
nanops.nanargmax,
"argmax",
axis=axis,
skipna=skipna,
numeric_only=numeric_only,
)
indices = res._values
index = df._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
return df._constructor_sliced(result, index=res.index)
return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only)

func.__name__ = "idxmax"
result = self._python_apply_general(
Expand Down Expand Up @@ -2154,17 +2113,7 @@ def idxmin(
axis = self.axis

def func(df):
res = df._reduce(
nanops.nanargmin,
"argmin",
axis=axis,
skipna=skipna,
numeric_only=numeric_only,
)
indices = res._values
index = df._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
return df._constructor_sliced(result, index=res.index)
return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only)

func.__name__ = "idxmin"
result = self._python_apply_general(
Expand Down
18 changes: 1 addition & 17 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,9 +1380,7 @@ def _python_apply_general(
this can be coincidental leading to value-dependent behavior.
is_transform : bool, default False
Indicator for whether the function is actually a transform
and should not have group keys prepended. This is used
in _make_wrapper which generates both transforms (e.g. diff)
and non-transforms (e.g. corr)
and should not have group keys prepended.
is_agg : bool, default False
Indicator for whether the function is an aggregation. When the
result is empty, we don't want to warn for this case.
Expand Down Expand Up @@ -4110,15 +4108,8 @@ def get_groupby(
obj: NDFrame,
by: _KeysArgType | None = None,
axis: AxisInt = 0,
level=None,
grouper: ops.BaseGrouper | None = None,
exclusions=None,
selection=None,
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = True,
observed: bool = False,
dropna: bool = True,
) -> GroupBy:

klass: type[GroupBy]
Expand All @@ -4137,15 +4128,8 @@ def get_groupby(
obj=obj,
keys=by,
axis=axis,
level=level,
grouper=grouper,
exclusions=exclusions,
selection=selection,
as_index=as_index,
sort=sort,
group_keys=group_keys,
observed=observed,
dropna=dropna,
)


Expand Down
9 changes: 4 additions & 5 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter:
Generator yielding subsetted objects
"""
ids, _, ngroups = self.group_info
return get_splitter(data, ids, ngroups, axis=axis)
return _get_splitter(data, ids, ngroups, axis=axis)

@final
@cache_readonly
Expand Down Expand Up @@ -1017,13 +1017,12 @@ def agg_series(
def _aggregate_series_pure_python(
self, obj: Series, func: Callable
) -> npt.NDArray[np.object_]:
ids, _, ngroups = self.group_info
_, _, ngroups = self.group_info

result = np.empty(ngroups, dtype="O")
initialized = False

# equiv: splitter = self._get_splitter(obj, axis=0)
splitter = get_splitter(obj, ids, ngroups, axis=0)
splitter = self._get_splitter(obj, axis=0)

for i, group in enumerate(splitter):
res = func(group)
Expand Down Expand Up @@ -1268,7 +1267,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
return df.__finalize__(sdata, method="groupby")


def get_splitter(
def _get_splitter(
data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0
) -> DataSplitter:
if isinstance(data, Series):
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,6 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
new_mgr_locs = self._mgr_locs[slicer]

new_values = self._slice(slicer)

if new_values.ndim != self.values.ndim:
raise ValueError("Only same dim slicing is allowed")

return type(self)(new_values, new_mgr_locs, self.ndim)

@final
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,7 @@ def __init__(
self._timegrouper = timegrouper
self.keys = None
self.sort = True
# error: Incompatible types in assignment (expression has type "Union
# [int, Literal['index', 'columns', 'rows']]", variable has type "int")
self.axis = axis # type: ignore[assignment]
self.axis = obj._get_axis_number(axis)
self.kind = kind
self.group_keys = group_keys
self.as_index = True
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,7 @@ def test_cython_transform_frame(op, args, targop):
{"by": "string"},
]: # {"by": 'string_missing'}]:
# {"by": ['int','string']}]:
# TODO: remove or enable commented-out code

gb = df.groupby(group_keys=False, **gb_target)

Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/plotting/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,6 @@ def _check_plot_works(f, default_axes=False, **kwargs):
with tm.ensure_clean(return_filelike=True) as path:
plt.savefig(path)

except Exception as err:
raise err
finally:
tm.close(fig)

Expand Down