Skip to content

Commit 6a83d3c

Browse files
authored
CLN: Assorted (#51136)
* CLN: Assorted * fixup * revert
1 parent 44c30ac commit 6a83d3c

File tree

12 files changed

+35
-99
lines changed

12 files changed

+35
-99
lines changed

pandas/_libs/groupby.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def group_any_all(
5555
mask: np.ndarray, # const uint8_t[::1]
5656
val_test: Literal["any", "all"],
5757
skipna: bool,
58+
nullable: bool,
5859
) -> None: ...
5960
def group_sum(
6061
out: np.ndarray, # complexfloatingintuint_t[:, ::1]

pandas/_libs/parsers.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2042,7 +2042,7 @@ def _compute_na_values():
20422042
np.uint16: uint16info.max,
20432043
np.uint8: uint8info.max,
20442044
np.bool_: uint8info.max,
2045-
np.object_: np.nan # oof
2045+
np.object_: np.nan,
20462046
}
20472047
return na_values
20482048

pandas/_testing/compat.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""
22
Helpers for sharing tests between DataFrame/Series
33
"""
4+
from pandas._typing import DtypeObj
45

56
from pandas import DataFrame
67

78

8-
def get_dtype(obj):
9+
def get_dtype(obj) -> DtypeObj:
910
if isinstance(obj, DataFrame):
1011
# Note: we are assuming only one column
1112
return obj.dtypes.iat[0]

pandas/conftest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
- Dtypes
1818
- Misc
1919
"""
20+
from __future__ import annotations
2021

2122
from collections import abc
2223
from datetime import (
@@ -31,6 +32,7 @@
3132
import os
3233
from typing import (
3334
Callable,
35+
Hashable,
3436
Iterator,
3537
)
3638

@@ -1881,7 +1883,7 @@ def __init__(self, **kwargs) -> None:
18811883
(pd.NA, pd.NA, pd.NA),
18821884
]
18831885
)
1884-
def names(request):
1886+
def names(request) -> tuple[Hashable, Hashable, Hashable]:
18851887
"""
18861888
A 3-tuple of names, the first two for operands, the last for a result.
18871889
"""
@@ -1937,7 +1939,7 @@ def indexer_ial(request):
19371939

19381940

19391941
@pytest.fixture
1940-
def using_array_manager():
1942+
def using_array_manager() -> bool:
19411943
"""
19421944
Fixture to check if the array manager is being used.
19431945
"""
@@ -1958,7 +1960,7 @@ def using_copy_on_write() -> bool:
19581960

19591961

19601962
@pytest.fixture(params=warsaws)
1961-
def warsaw(request):
1963+
def warsaw(request) -> str:
19621964
"""
19631965
tzinfo for Europe/Warsaw using pytz, dateutil, or zoneinfo.
19641966
"""

pandas/core/arrays/arrow/dtype.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44

55
import numpy as np
66

7-
from pandas._typing import DtypeObj
7+
from pandas._typing import (
8+
TYPE_CHECKING,
9+
DtypeObj,
10+
type_t,
11+
)
812
from pandas.compat import pa_version_under7p0
913
from pandas.util._decorators import cache_readonly
1014

@@ -16,6 +20,9 @@
1620
if not pa_version_under7p0:
1721
import pyarrow as pa
1822

23+
if TYPE_CHECKING:
24+
from pandas.core.arrays.arrow import ArrowExtensionArray
25+
1926

2027
@register_extension_dtype
2128
class ArrowDtype(StorageExtensionDtype):
@@ -113,7 +120,7 @@ def itemsize(self) -> int:
113120
return self.numpy_dtype.itemsize
114121

115122
@classmethod
116-
def construct_array_type(cls):
123+
def construct_array_type(cls) -> type_t[ArrowExtensionArray]:
117124
"""
118125
Return the array type associated with this dtype.
119126

pandas/core/groupby/generic.py

Lines changed: 10 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,7 @@
6666
notna,
6767
)
6868

69-
from pandas.core import (
70-
algorithms,
71-
nanops,
72-
)
69+
from pandas.core import algorithms
7370
from pandas.core.apply import (
7471
GroupByApply,
7572
maybe_mangle_lambdas,
@@ -98,6 +95,7 @@
9895
from pandas.plotting import boxplot_frame_groupby
9996

10097
if TYPE_CHECKING:
98+
from pandas import Categorical
10199
from pandas.core.generic import NDFrame
102100

103101
# TODO(typing) the return value on this callable should be any *scalar*.
@@ -138,29 +136,6 @@ class NamedAgg(NamedTuple):
138136
aggfunc: AggScalar
139137

140138

141-
def generate_property(name: str, klass: type[DataFrame | Series]):
142-
"""
143-
Create a property for a GroupBy subclass to dispatch to DataFrame/Series.
144-
145-
Parameters
146-
----------
147-
name : str
148-
klass : {DataFrame, Series}
149-
150-
Returns
151-
-------
152-
property
153-
"""
154-
155-
def prop(self):
156-
return self._make_wrapper(name)
157-
158-
parent_method = getattr(klass, name)
159-
prop.__doc__ = parent_method.__doc__ or ""
160-
prop.__name__ = name
161-
return property(prop)
162-
163-
164139
class SeriesGroupBy(GroupBy[Series]):
165140
def _wrap_agged_manager(self, mgr: Manager) -> Series:
166141
return self.obj._constructor(mgr, name=self.obj.name)
@@ -718,18 +693,13 @@ def value_counts(
718693
else:
719694

720695
# lab is a Categorical with categories an IntervalIndex
721-
lab = cut(Series(val), bins, include_lowest=True)
722-
# error: "ndarray" has no attribute "cat"
723-
lev = lab.cat.categories # type: ignore[attr-defined]
724-
# error: No overload variant of "take" of "_ArrayOrScalarCommon" matches
725-
# argument types "Any", "bool", "Union[Any, float]"
726-
lab = lev.take( # type: ignore[call-overload]
727-
# error: "ndarray" has no attribute "cat"
728-
lab.cat.codes, # type: ignore[attr-defined]
696+
cat_ser = cut(Series(val), bins, include_lowest=True)
697+
cat_obj = cast("Categorical", cat_ser._values)
698+
lev = cat_obj.categories
699+
lab = lev.take(
700+
cat_obj.codes,
729701
allow_fill=True,
730-
# error: Item "ndarray" of "Union[ndarray, Index]" has no attribute
731-
# "_na_value"
732-
fill_value=lev._na_value, # type: ignore[union-attr]
702+
fill_value=lev._na_value,
733703
)
734704
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
735705

@@ -1544,7 +1514,6 @@ def _cython_transform(
15441514
**kwargs,
15451515
) -> DataFrame:
15461516
assert axis == 0 # handled by caller
1547-
# TODO: no tests with self.ndim == 1 for DataFrameGroupBy
15481517

15491518
# With self.axis == 0, we have multi-block tests
15501519
# e.g. test_rank_min_int, test_cython_transform_frame
@@ -2058,17 +2027,7 @@ def idxmax(
20582027
axis = self.axis
20592028

20602029
def func(df):
2061-
res = df._reduce(
2062-
nanops.nanargmax,
2063-
"argmax",
2064-
axis=axis,
2065-
skipna=skipna,
2066-
numeric_only=numeric_only,
2067-
)
2068-
indices = res._values
2069-
index = df._get_axis(axis)
2070-
result = [index[i] if i >= 0 else np.nan for i in indices]
2071-
return df._constructor_sliced(result, index=res.index)
2030+
return df.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only)
20722031

20732032
func.__name__ = "idxmax"
20742033
result = self._python_apply_general(
@@ -2154,17 +2113,7 @@ def idxmin(
21542113
axis = self.axis
21552114

21562115
def func(df):
2157-
res = df._reduce(
2158-
nanops.nanargmin,
2159-
"argmin",
2160-
axis=axis,
2161-
skipna=skipna,
2162-
numeric_only=numeric_only,
2163-
)
2164-
indices = res._values
2165-
index = df._get_axis(axis)
2166-
result = [index[i] if i >= 0 else np.nan for i in indices]
2167-
return df._constructor_sliced(result, index=res.index)
2116+
return df.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only)
21682117

21692118
func.__name__ = "idxmin"
21702119
result = self._python_apply_general(

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,9 +1380,7 @@ def _python_apply_general(
13801380
this can be coincidental leading to value-dependent behavior.
13811381
is_transform : bool, default False
13821382
Indicator for whether the function is actually a transform
1383-
and should not have group keys prepended. This is used
1384-
in _make_wrapper which generates both transforms (e.g. diff)
1385-
and non-transforms (e.g. corr)
1383+
and should not have group keys prepended.
13861384
is_agg : bool, default False
13871385
Indicator for whether the function is an aggregation. When the
13881386
result is empty, we don't want to warn for this case.
@@ -4110,15 +4108,8 @@ def get_groupby(
41104108
obj: NDFrame,
41114109
by: _KeysArgType | None = None,
41124110
axis: AxisInt = 0,
4113-
level=None,
41144111
grouper: ops.BaseGrouper | None = None,
4115-
exclusions=None,
4116-
selection=None,
4117-
as_index: bool = True,
4118-
sort: bool = True,
41194112
group_keys: bool | lib.NoDefault = True,
4120-
observed: bool = False,
4121-
dropna: bool = True,
41224113
) -> GroupBy:
41234114

41244115
klass: type[GroupBy]
@@ -4137,15 +4128,8 @@ def get_groupby(
41374128
obj=obj,
41384129
keys=by,
41394130
axis=axis,
4140-
level=level,
41414131
grouper=grouper,
4142-
exclusions=exclusions,
4143-
selection=selection,
4144-
as_index=as_index,
4145-
sort=sort,
41464132
group_keys=group_keys,
4147-
observed=observed,
4148-
dropna=dropna,
41494133
)
41504134

41514135

pandas/core/groupby/ops.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter:
743743
Generator yielding subsetted objects
744744
"""
745745
ids, _, ngroups = self.group_info
746-
return get_splitter(data, ids, ngroups, axis=axis)
746+
return _get_splitter(data, ids, ngroups, axis=axis)
747747

748748
@final
749749
@cache_readonly
@@ -1017,13 +1017,12 @@ def agg_series(
10171017
def _aggregate_series_pure_python(
10181018
self, obj: Series, func: Callable
10191019
) -> npt.NDArray[np.object_]:
1020-
ids, _, ngroups = self.group_info
1020+
_, _, ngroups = self.group_info
10211021

10221022
result = np.empty(ngroups, dtype="O")
10231023
initialized = False
10241024

1025-
# equiv: splitter = self._get_splitter(obj, axis=0)
1026-
splitter = get_splitter(obj, ids, ngroups, axis=0)
1025+
splitter = self._get_splitter(obj, axis=0)
10271026

10281027
for i, group in enumerate(splitter):
10291028
res = func(group)
@@ -1268,7 +1267,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
12681267
return df.__finalize__(sdata, method="groupby")
12691268

12701269

1271-
def get_splitter(
1270+
def _get_splitter(
12721271
data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0
12731272
) -> DataSplitter:
12741273
if isinstance(data, Series):

pandas/core/internals/blocks.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,6 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
261261
new_mgr_locs = self._mgr_locs[slicer]
262262

263263
new_values = self._slice(slicer)
264-
265-
if new_values.ndim != self.values.ndim:
266-
raise ValueError("Only same dim slicing is allowed")
267-
268264
return type(self)(new_values, new_mgr_locs, self.ndim)
269265

270266
@final

pandas/core/resample.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,7 @@ def __init__(
161161
self._timegrouper = timegrouper
162162
self.keys = None
163163
self.sort = True
164-
# error: Incompatible types in assignment (expression has type "Union
165-
# [int, Literal['index', 'columns', 'rows']]", variable has type "int")
166-
self.axis = axis # type: ignore[assignment]
164+
self.axis = obj._get_axis_number(axis)
167165
self.kind = kind
168166
self.group_keys = group_keys
169167
self.as_index = True

pandas/tests/groupby/transform/test_transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,7 @@ def test_cython_transform_frame(op, args, targop):
708708
{"by": "string"},
709709
]: # {"by": 'string_missing'}]:
710710
# {"by": ['int','string']}]:
711+
# TODO: remove or enable commented-out code
711712

712713
gb = df.groupby(group_keys=False, **gb_target)
713714

pandas/tests/plotting/common.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,6 @@ def _check_plot_works(f, default_axes=False, **kwargs):
560560
with tm.ensure_clean(return_filelike=True) as path:
561561
plt.savefig(path)
562562

563-
except Exception as err:
564-
raise err
565563
finally:
566564
tm.close(fig)
567565

0 commit comments

Comments
 (0)