Skip to content

DEPR/ENH: support axis=None in min/max #45072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,10 +521,12 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar

if "axis" not in kwargs:
# For DataFrame reductions we don't want the default axis=0
# FIXME: DataFrame.min ignores axis=None
# FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
# but np.minimum.reduce(df.values) behaves as if axis=0
kwargs["axis"] = None
# Note: np.min is not a ufunc, but uses array_function_dispatch,
# so calls DataFrame.min (without ever getting here) with the np.min
# default of axis=None, which DataFrame.min catches and changes to axis=0.
# np.minimum.reduce(df) gets here bc axis is not in kwargs,
# so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
kwargs["axis"] = 0

# By default, numpy's reductions do not skip NaNs, so we have to
# pass skipna=False
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,14 @@ def require_length_match(data, index: Index):
)


_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
# whereas np.min and np.max (which directly call obj.min and obj.max)
# default to axis=None.
_builtin_table = {
builtins.sum: np.sum,
builtins.max: np.maximum.reduce,
builtins.min: np.minimum.reduce,
}

_cython_table = {
builtins.sum: "sum",
Expand Down
81 changes: 73 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10546,26 +10546,74 @@ def _stat_function(

def min(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
level: Level | None = None,
numeric_only: bool_t | None = None,
**kwargs,
):
if axis is None and level is None and self.ndim > 1:
# user must have explicitly passed axis=None
# GH#21597
warnings.warn(
"In a future version, DataFrame.min(axis=None) will return a scalar "
"minimum over the entire DataFrame. To retain the old behavior, "
"use 'frame.min(axis=0)' or just 'frame.min()'",
FutureWarning,
stacklevel=find_stack_level(),
)

if axis is lib.no_default:
# Until we can implement axis=None for all _stat_function methods,
# we change back to axis=None here.
axis = None
return self._stat_function(
"min", nanops.nanmin, axis, skipna, level, numeric_only, **kwargs
"min",
nanops.nanmin,
# error: Argument 3 to "_stat_function" of "NDFrame" has incompatible
# type "Union[Union[str, int], None, NoDefault]"; expected
# "Optional[Union[str, int]]"
axis, # type: ignore[arg-type]
skipna,
level,
numeric_only,
**kwargs,
)

def max(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
level: Level | None = None,
numeric_only: bool_t | None = None,
**kwargs,
):
if axis is None and level is None and self.ndim > 1:
# user must have explicitly passed axis=None
# GH#21597
warnings.warn(
"In a future version, DataFrame.max(axis=None) will return a scalar "
"maximum over the entire DataFrame. To retain the old behavior, "
"use 'frame.max(axis=0)' or just 'frame.max()'",
FutureWarning,
stacklevel=find_stack_level(),
)

if axis is lib.no_default:
# Until we can implement axis=None for all _stat_function methods,
# we change back to axis=None here.
axis = None
return self._stat_function(
"max", nanops.nanmax, axis, skipna, level, numeric_only, **kwargs
"max",
nanops.nanmax,
# error: Argument 3 to "_stat_function" of "NDFrame" has incompatible
# type "Union[Union[str, int], None, NoDefault]"; expected
# "Optional[Union[str, int]]"
axis, # type: ignore[arg-type]
skipna,
level,
numeric_only,
**kwargs,
)

def mean(
Expand Down Expand Up @@ -10657,6 +10705,7 @@ def _min_count_stat_function(
min_count=min_count,
numeric_only=numeric_only,
)

return self._reduce(
func,
name=name,
Expand Down Expand Up @@ -11053,7 +11102,8 @@ def median(

setattr(cls, "median", median)

@doc(
# error: Untyped decorator makes function "max" untyped
@doc( # type: ignore[misc]
_num_doc,
desc="Return the maximum of the values over the requested axis.\n\n"
"If you want the *index* of the maximum, use ``idxmax``. This is "
Expand All @@ -11065,12 +11115,20 @@ def median(
see_also=_stat_func_see_also,
examples=_max_examples,
)
def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
def max(
self,
axis: int | None | lib.NoDefault = lib.no_default,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins is this going to render in the docs in a way we don't want?

(similar question bugs me in a WIP branch that changes the default from method="pad" in .replace)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be a followup

skipna=True,
level=None,
numeric_only=None,
**kwargs,
):
return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs)

setattr(cls, "max", max)

@doc(
# error: Untyped decorator makes function "max" untyped
@doc( # type: ignore[misc]
_num_doc,
desc="Return the minimum of the values over the requested axis.\n\n"
"If you want the *index* of the minimum, use ``idxmin``. This is "
Expand All @@ -11082,7 +11140,14 @@ def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
see_also=_stat_func_see_also,
examples=_min_examples,
)
def min(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
def min(
self,
axis: int | None | lib.NoDefault = lib.no_default,
skipna=True,
level=None,
numeric_only=None,
**kwargs,
):
return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs)

setattr(cls, "min", min)
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1765,3 +1765,25 @@ def test_prod_sum_min_count_mixed_object():
msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
with pytest.raises(TypeError, match=msg):
df.sum(axis=0, min_count=1, numeric_only=False)


def test_min_max_axis_none_deprecation():
# GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it
# to reducing over all axes.

df = DataFrame(np.random.randn(4, 4))

msg = "scalar (maximum|minimum) over the entire DataFrame"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.max(axis=None)
with tm.assert_produces_warning(None):
expected = df.max()
tm.assert_series_equal(res, expected)
tm.assert_series_equal(res, df.max(axis=0))

with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.min(axis=None)
with tm.assert_produces_warning(None):
expected = df.min()
tm.assert_series_equal(res, expected)
tm.assert_series_equal(res, df.min(axis=0))
23 changes: 19 additions & 4 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_stats(group):
assert result.index.names[0] == "C"


def test_basic():
def test_basic(): # TODO: split this test

cats = Categorical(
["a", "a", "a", "b", "b", "b", "c", "c", "c"],
Expand Down Expand Up @@ -142,9 +142,24 @@ def f(x):
df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"]
)
tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
tm.assert_frame_equal(
df.groupby(c, observed=False).transform(lambda xs: np.max(xs)), df[["a"]]
)

gbc = df.groupby(c, observed=False)
with tm.assert_produces_warning(
FutureWarning, match="scalar maximum", check_stacklevel=False
):
# stacklevel is thrown off (i think) bc the stack goes through numpy C code
result = gbc.transform(lambda xs: np.max(xs))
tm.assert_frame_equal(result, df[["a"]])

with tm.assert_produces_warning(None):
result2 = gbc.transform(lambda xs: np.max(xs, axis=0))
result3 = gbc.transform(max)
result4 = gbc.transform(np.maximum.reduce)
result5 = gbc.transform(lambda xs: np.maximum.reduce(xs))
tm.assert_frame_equal(result2, df[["a"]], check_dtype=False)
tm.assert_frame_equal(result3, df[["a"]], check_dtype=False)
tm.assert_frame_equal(result4, df[["a"]])
tm.assert_frame_equal(result5, df[["a"]])

# Filter
tm.assert_series_equal(df.a.groupby(c, observed=False).filter(np.all), df["a"])
Expand Down
25 changes: 19 additions & 6 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,33 @@ def test_builtins_apply(keys, f):
df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"])
df["jolie"] = np.random.randn(1000)

gb = df.groupby(keys)

fname = f.__name__
result = df.groupby(keys).apply(f)
result = gb.apply(f)
ngroups = len(df.drop_duplicates(subset=keys))

assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
assert result.shape == (ngroups, 3), assert_msg

tm.assert_frame_equal(
result, # numpy's equivalent function
df.groupby(keys).apply(getattr(np, fname)),
)
npfunc = getattr(np, fname) # numpy's equivalent function
if f in [max, min]:
warn = FutureWarning
else:
warn = None
msg = "scalar (maximum|minimum) over the entire DataFrame"
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
# stacklevel can be thrown off because (i think) the stack
# goes through some of numpy's C code.
expected = gb.apply(npfunc)
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(None):
expected2 = gb.apply(lambda x: npfunc(x, axis=0))
tm.assert_frame_equal(result, expected2)

if f != sum:
expected = df.groupby(keys).agg(fname).reset_index()
expected = gb.agg(fname).reset_index()
expected.set_index(keys, inplace=True, drop=False)
tm.assert_frame_equal(result, expected, check_dtype=False)

Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,20 +348,24 @@ def test_expanding_corr_pairwise(frame):

@pytest.mark.parametrize(
"func,static_comp",
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
[
("sum", np.sum),
("mean", np.mean),
("max", lambda x: np.max(x, axis=0)),
("min", lambda x: np.min(x, axis=0)),
],
ids=["sum", "mean", "max", "min"],
)
def test_expanding_func(func, static_comp, frame_or_series):
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
result = getattr(data.expanding(min_periods=1, axis=0), func)()
assert isinstance(result, frame_or_series)

expected = static_comp(data[:11])
if frame_or_series is Series:
tm.assert_almost_equal(result[10], static_comp(data[:11]))
tm.assert_almost_equal(result[10], expected)
else:
tm.assert_series_equal(
result.iloc[10], static_comp(data[:11]), check_names=False
)
tm.assert_series_equal(result.iloc[10], expected, check_names=False)


@pytest.mark.parametrize(
Expand Down