Skip to content

Commit 60b8f05

Browse files
authored
CLN: Clean groupby/test_function.py (#32027)
1 parent 96644d0 commit 60b8f05

File tree

1 file changed

+134
-89
lines changed

1 file changed

+134
-89
lines changed

pandas/tests/groupby/test_function.py

Lines changed: 134 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,26 @@
2626
from pandas.util import _test_decorators as td
2727

2828

29+
@pytest.fixture(
30+
params=[np.int32, np.int64, np.float32, np.float64],
31+
ids=["np.int32", "np.int64", "np.float32", "np.float64"],
32+
)
33+
def numpy_dtypes_for_minmax(request):
34+
"""
35+
Fixture of numpy dtypes with min and max values used for testing
36+
cummin and cummax
37+
"""
38+
dtype = request.param
39+
min_val = (
40+
np.iinfo(dtype).min if np.dtype(dtype).kind == "i" else np.finfo(dtype).min
41+
)
42+
max_val = (
43+
np.iinfo(dtype).max if np.dtype(dtype).kind == "i" else np.finfo(dtype).max
44+
)
45+
46+
return (dtype, min_val, max_val)
47+
48+
2949
@pytest.mark.parametrize("agg_func", ["any", "all"])
3050
@pytest.mark.parametrize("skipna", [True, False])
3151
@pytest.mark.parametrize(
@@ -174,11 +194,10 @@ def test_arg_passthru():
174194
)
175195

176196
for attr in ["mean", "median"]:
177-
f = getattr(df.groupby("group"), attr)
178-
result = f()
197+
result = getattr(df.groupby("group"), attr)()
179198
tm.assert_index_equal(result.columns, expected_columns_numeric)
180199

181-
result = f(numeric_only=False)
200+
result = getattr(df.groupby("group"), attr)(numeric_only=False)
182201
tm.assert_frame_equal(result.reindex_like(expected), expected)
183202

184203
# TODO: min, max *should* handle
@@ -195,11 +214,10 @@ def test_arg_passthru():
195214
]
196215
)
197216
for attr in ["min", "max"]:
198-
f = getattr(df.groupby("group"), attr)
199-
result = f()
217+
result = getattr(df.groupby("group"), attr)()
200218
tm.assert_index_equal(result.columns, expected_columns)
201219

202-
result = f(numeric_only=False)
220+
result = getattr(df.groupby("group"), attr)(numeric_only=False)
203221
tm.assert_index_equal(result.columns, expected_columns)
204222

205223
expected_columns = Index(
@@ -215,52 +233,47 @@ def test_arg_passthru():
215233
]
216234
)
217235
for attr in ["first", "last"]:
218-
f = getattr(df.groupby("group"), attr)
219-
result = f()
236+
result = getattr(df.groupby("group"), attr)()
220237
tm.assert_index_equal(result.columns, expected_columns)
221238

222-
result = f(numeric_only=False)
239+
result = getattr(df.groupby("group"), attr)(numeric_only=False)
223240
tm.assert_index_equal(result.columns, expected_columns)
224241

225242
expected_columns = Index(["int", "float", "string", "category_int", "timedelta"])
226-
for attr in ["sum"]:
227-
f = getattr(df.groupby("group"), attr)
228-
result = f()
229-
tm.assert_index_equal(result.columns, expected_columns_numeric)
230243

231-
result = f(numeric_only=False)
232-
tm.assert_index_equal(result.columns, expected_columns)
244+
result = df.groupby("group").sum()
245+
tm.assert_index_equal(result.columns, expected_columns_numeric)
246+
247+
result = df.groupby("group").sum(numeric_only=False)
248+
tm.assert_index_equal(result.columns, expected_columns)
233249

234250
expected_columns = Index(["int", "float", "category_int"])
235251
for attr in ["prod", "cumprod"]:
236-
f = getattr(df.groupby("group"), attr)
237-
result = f()
252+
result = getattr(df.groupby("group"), attr)()
238253
tm.assert_index_equal(result.columns, expected_columns_numeric)
239254

240-
result = f(numeric_only=False)
255+
result = getattr(df.groupby("group"), attr)(numeric_only=False)
241256
tm.assert_index_equal(result.columns, expected_columns)
242257

243258
# like min, max, but don't include strings
244259
expected_columns = Index(
245260
["int", "float", "category_int", "datetime", "datetimetz", "timedelta"]
246261
)
247262
for attr in ["cummin", "cummax"]:
248-
f = getattr(df.groupby("group"), attr)
249-
result = f()
263+
result = getattr(df.groupby("group"), attr)()
250264
# GH 15561: numeric_only=False set by default like min/max
251265
tm.assert_index_equal(result.columns, expected_columns)
252266

253-
result = f(numeric_only=False)
267+
result = getattr(df.groupby("group"), attr)(numeric_only=False)
254268
tm.assert_index_equal(result.columns, expected_columns)
255269

256270
expected_columns = Index(["int", "float", "category_int", "timedelta"])
257-
for attr in ["cumsum"]:
258-
f = getattr(df.groupby("group"), attr)
259-
result = f()
260-
tm.assert_index_equal(result.columns, expected_columns_numeric)
261271

262-
result = f(numeric_only=False)
263-
tm.assert_index_equal(result.columns, expected_columns)
272+
result = getattr(df.groupby("group"), "cumsum")()
273+
tm.assert_index_equal(result.columns, expected_columns_numeric)
274+
275+
result = getattr(df.groupby("group"), "cumsum")(numeric_only=False)
276+
tm.assert_index_equal(result.columns, expected_columns)
264277

265278

266279
def test_non_cython_api():
@@ -691,59 +704,31 @@ def test_numpy_compat(func):
691704
reason="https://github.com/pandas-dev/pandas/issues/31992",
692705
strict=False,
693706
)
694-
def test_cummin_cummax():
707+
def test_cummin(numpy_dtypes_for_minmax):
708+
dtype = numpy_dtypes_for_minmax[0]
709+
min_val = numpy_dtypes_for_minmax[1]
710+
695711
# GH 15048
696-
num_types = [np.int32, np.int64, np.float32, np.float64]
697-
num_mins = [
698-
np.iinfo(np.int32).min,
699-
np.iinfo(np.int64).min,
700-
np.finfo(np.float32).min,
701-
np.finfo(np.float64).min,
702-
]
703-
num_max = [
704-
np.iinfo(np.int32).max,
705-
np.iinfo(np.int64).max,
706-
np.finfo(np.float32).max,
707-
np.finfo(np.float64).max,
708-
]
709712
base_df = pd.DataFrame(
710713
{"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}
711714
)
712715
expected_mins = [3, 3, 3, 2, 2, 2, 2, 1]
713-
expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
714716

715-
for dtype, min_val, max_val in zip(num_types, num_mins, num_max):
716-
df = base_df.astype(dtype)
717+
df = base_df.astype(dtype)
717718

718-
# cummin
719-
expected = pd.DataFrame({"B": expected_mins}).astype(dtype)
720-
result = df.groupby("A").cummin()
721-
tm.assert_frame_equal(result, expected)
722-
result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
723-
tm.assert_frame_equal(result, expected)
724-
725-
# Test cummin w/ min value for dtype
726-
df.loc[[2, 6], "B"] = min_val
727-
expected.loc[[2, 3, 6, 7], "B"] = min_val
728-
result = df.groupby("A").cummin()
729-
tm.assert_frame_equal(result, expected)
730-
expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
731-
tm.assert_frame_equal(result, expected)
732-
733-
# cummax
734-
expected = pd.DataFrame({"B": expected_maxs}).astype(dtype)
735-
result = df.groupby("A").cummax()
736-
tm.assert_frame_equal(result, expected)
737-
result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
738-
tm.assert_frame_equal(result, expected)
719+
expected = pd.DataFrame({"B": expected_mins}).astype(dtype)
720+
result = df.groupby("A").cummin()
721+
tm.assert_frame_equal(result, expected)
722+
result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
723+
tm.assert_frame_equal(result, expected)
739724

740-
# Test cummax w/ max value for dtype
741-
df.loc[[2, 6], "B"] = max_val
742-
expected.loc[[2, 3, 6, 7], "B"] = max_val
743-
result = df.groupby("A").cummax()
744-
tm.assert_frame_equal(result, expected)
745-
expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
746-
tm.assert_frame_equal(result, expected)
725+
# Test w/ min value for dtype
726+
df.loc[[2, 6], "B"] = min_val
727+
expected.loc[[2, 3, 6, 7], "B"] = min_val
728+
result = df.groupby("A").cummin()
729+
tm.assert_frame_equal(result, expected)
730+
expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
731+
tm.assert_frame_equal(result, expected)
747732

748733
# Test nan in some values
749734
base_df.loc[[0, 2, 4, 6], "B"] = np.nan
@@ -753,41 +738,101 @@ def test_cummin_cummax():
753738
expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
754739
tm.assert_frame_equal(result, expected)
755740

756-
expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
757-
result = base_df.groupby("A").cummax()
758-
tm.assert_frame_equal(result, expected)
759-
expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
760-
tm.assert_frame_equal(result, expected)
741+
# GH 15561
742+
df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"])))
743+
expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b")
744+
745+
result = df.groupby("a")["b"].cummin()
746+
tm.assert_series_equal(expected, result)
747+
748+
# GH 15635
749+
df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
750+
result = df.groupby("a").b.cummin()
751+
expected = pd.Series([1, 2, 1], name="b")
752+
tm.assert_series_equal(result, expected)
753+
754+
755+
@pytest.mark.xfail(
756+
_is_numpy_dev,
757+
reason="https://github.com/pandas-dev/pandas/issues/31992",
758+
strict=False,
759+
)
760+
def test_cummin_all_nan_column():
761+
base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8})
761762

762-
# Test nan in entire column
763-
base_df["B"] = np.nan
764763
expected = pd.DataFrame({"B": [np.nan] * 8})
765764
result = base_df.groupby("A").cummin()
766765
tm.assert_frame_equal(expected, result)
767766
result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame()
768767
tm.assert_frame_equal(expected, result)
768+
769+
770+
@pytest.mark.xfail(
771+
_is_numpy_dev,
772+
reason="https://github.com/pandas-dev/pandas/issues/31992",
773+
strict=False,
774+
)
775+
def test_cummax(numpy_dtypes_for_minmax):
776+
dtype = numpy_dtypes_for_minmax[0]
777+
max_val = numpy_dtypes_for_minmax[2]
778+
779+
# GH 15048
780+
base_df = pd.DataFrame(
781+
{"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]}
782+
)
783+
expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3]
784+
785+
df = base_df.astype(dtype)
786+
787+
expected = pd.DataFrame({"B": expected_maxs}).astype(dtype)
788+
result = df.groupby("A").cummax()
789+
tm.assert_frame_equal(result, expected)
790+
result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
791+
tm.assert_frame_equal(result, expected)
792+
793+
# Test w/ max value for dtype
794+
df.loc[[2, 6], "B"] = max_val
795+
expected.loc[[2, 3, 6, 7], "B"] = max_val
796+
result = df.groupby("A").cummax()
797+
tm.assert_frame_equal(result, expected)
798+
expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
799+
tm.assert_frame_equal(result, expected)
800+
801+
# Test nan in some values
802+
base_df.loc[[0, 2, 4, 6], "B"] = np.nan
803+
expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]})
769804
result = base_df.groupby("A").cummax()
770-
tm.assert_frame_equal(expected, result)
771-
result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
772-
tm.assert_frame_equal(expected, result)
805+
tm.assert_frame_equal(result, expected)
806+
expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
807+
tm.assert_frame_equal(result, expected)
773808

774809
# GH 15561
775810
df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"])))
776811
expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b")
777-
for method in ["cummax", "cummin"]:
778-
result = getattr(df.groupby("a")["b"], method)()
779-
tm.assert_series_equal(expected, result)
812+
813+
result = df.groupby("a")["b"].cummax()
814+
tm.assert_series_equal(expected, result)
780815

781816
# GH 15635
782817
df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
783818
result = df.groupby("a").b.cummax()
784819
expected = pd.Series([2, 1, 2], name="b")
785820
tm.assert_series_equal(result, expected)
786821

787-
df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
788-
result = df.groupby("a").b.cummin()
789-
expected = pd.Series([1, 2, 1], name="b")
790-
tm.assert_series_equal(result, expected)
822+
823+
@pytest.mark.xfail(
824+
_is_numpy_dev,
825+
reason="https://github.com/pandas-dev/pandas/issues/31992",
826+
strict=False,
827+
)
828+
def test_cummax_all_nan_column():
829+
base_df = pd.DataFrame({"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [np.nan] * 8})
830+
831+
expected = pd.DataFrame({"B": [np.nan] * 8})
832+
result = base_df.groupby("A").cummax()
833+
tm.assert_frame_equal(expected, result)
834+
result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame()
835+
tm.assert_frame_equal(expected, result)
791836

792837

793838
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)