pandas-dev · jreback · Dec 28, 2021 · Dec 26, 2021 · Dec 26, 2021 · Dec 26, 2021
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -521,10 +521,12 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
 
         if "axis" not in kwargs:
             # For DataFrame reductions we don't want the default axis=0
-            # FIXME: DataFrame.min ignores axis=None
-            # FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
-            #  but np.minimum.reduce(df.values) behaves as if axis=0
-            kwargs["axis"] = None
+            # Note: np.min is not a ufunc, but uses array_function_dispatch,
+            #  so calls DataFrame.min (without ever getting here) with the np.min
+            #  default of axis=None, which DataFrame.min catches and changes to axis=0.
+            # np.minimum.reduce(df) gets here bc axis is not in kwargs,
+            #  so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
+            kwargs["axis"] = 0
 
     # By default, numpy's reductions do not skip NaNs, so we have to
     #  pass skipna=False

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -562,7 +562,14 @@ def require_length_match(data, index: Index):
         )
 
 
-_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
+# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
+#  whereas np.min and np.max (which directly call obj.min and obj.max)
+#  default to axis=None.
+_builtin_table = {
+    builtins.sum: np.sum,
+    builtins.max: np.maximum.reduce,
+    builtins.min: np.minimum.reduce,
+}
 
 _cython_table = {
     builtins.sum: "sum",

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10546,26 +10546,74 @@ def _stat_function(
 
     def min(
         self,
-        axis: Axis | None = None,
+        axis: Axis | None | lib.NoDefault = lib.no_default,
         skipna: bool_t = True,
         level: Level | None = None,
         numeric_only: bool_t | None = None,
         **kwargs,
     ):
+        if axis is None and level is None and self.ndim > 1:
+            # user must have explicitly passed axis=None
+            # GH#21597
+            warnings.warn(
+                "In a future version, DataFrame.min(axis=None) will return a scalar "
+                "minimum over the entire DataFrame. To retain the old behavior, "
+                "use 'frame.min(axis=0)' or just 'frame.min()'",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        if axis is lib.no_default:
+            # Until we can implement axis=None for all _stat_function methods,
+            #  we change back to axis=None here.
+            axis = None
         return self._stat_function(
-            "min", nanops.nanmin, axis, skipna, level, numeric_only, **kwargs
+            "min",
+            nanops.nanmin,
+            # error: Argument 3 to "_stat_function" of "NDFrame" has incompatible
+            # type "Union[Union[str, int], None, NoDefault]"; expected
+            # "Optional[Union[str, int]]"
+            axis,  # type: ignore[arg-type]
+            skipna,
+            level,
+            numeric_only,
+            **kwargs,
         )
 
     def max(
         self,
-        axis: Axis | None = None,
+        axis: Axis | None | lib.NoDefault = lib.no_default,
         skipna: bool_t = True,
         level: Level | None = None,
         numeric_only: bool_t | None = None,
         **kwargs,
     ):
+        if axis is None and level is None and self.ndim > 1:
+            # user must have explicitly passed axis=None
+            # GH#21597
+            warnings.warn(
+                "In a future version, DataFrame.max(axis=None) will return a scalar "
+                "maximum over the entire DataFrame. To retain the old behavior, "
+                "use 'frame.max(axis=0)' or just 'frame.max()'",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        if axis is lib.no_default:
+            # Until we can implement axis=None for all _stat_function methods,
+            #  we change back to axis=None here.
+            axis = None
         return self._stat_function(
-            "max", nanops.nanmax, axis, skipna, level, numeric_only, **kwargs
+            "max",
+            nanops.nanmax,
+            # error: Argument 3 to "_stat_function" of "NDFrame" has incompatible
+            # type "Union[Union[str, int], None, NoDefault]"; expected
+            # "Optional[Union[str, int]]"
+            axis,  # type: ignore[arg-type]
+            skipna,
+            level,
+            numeric_only,
+            **kwargs,
         )
 
     def mean(
@@ -10657,6 +10705,7 @@ def _min_count_stat_function(
                 min_count=min_count,
                 numeric_only=numeric_only,
             )
+
         return self._reduce(
             func,
             name=name,
@@ -11053,7 +11102,8 @@ def median(
 
         setattr(cls, "median", median)
 
-        @doc(
+        # error: Untyped decorator makes function "max" untyped
+        @doc(  # type: ignore[misc]
             _num_doc,
             desc="Return the maximum of the values over the requested axis.\n\n"
             "If you want the *index* of the maximum, use ``idxmax``. This is "
@@ -11065,12 +11115,20 @@ def median(
             see_also=_stat_func_see_also,
             examples=_max_examples,
         )
-        def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
+        def max(
+            self,
+            axis: int | None | lib.NoDefault = lib.no_default,
+            skipna=True,
+            level=None,
+            numeric_only=None,
+            **kwargs,
+        ):
             return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs)
 
         setattr(cls, "max", max)
 
-        @doc(
+        # error: Untyped decorator makes function "max" untyped
+        @doc(  # type: ignore[misc]
             _num_doc,
             desc="Return the minimum of the values over the requested axis.\n\n"
             "If you want the *index* of the minimum, use ``idxmin``. This is "
@@ -11082,7 +11140,14 @@ def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
             see_also=_stat_func_see_also,
             examples=_min_examples,
         )
-        def min(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
+        def min(
+            self,
+            axis: int | None | lib.NoDefault = lib.no_default,
+            skipna=True,
+            level=None,
+            numeric_only=None,
+            **kwargs,
+        ):
             return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs)
 
         setattr(cls, "min", min)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -1765,3 +1765,25 @@ def test_prod_sum_min_count_mixed_object():
     msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
     with pytest.raises(TypeError, match=msg):
         df.sum(axis=0, min_count=1, numeric_only=False)
+
+
+def test_min_max_axis_none_deprecation():
+    # GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it
+    #  to reducing over all axes.
+
+    df = DataFrame(np.random.randn(4, 4))
+
+    msg = "scalar (maximum|minimum) over the entire DataFrame"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res = df.max(axis=None)
+    with tm.assert_produces_warning(None):
+        expected = df.max()
+    tm.assert_series_equal(res, expected)
+    tm.assert_series_equal(res, df.max(axis=0))
+
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        res = df.min(axis=None)
+    with tm.assert_produces_warning(None):
+        expected = df.min()
+    tm.assert_series_equal(res, expected)
+    tm.assert_series_equal(res, df.min(axis=0))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -81,7 +81,7 @@ def get_stats(group):
     assert result.index.names[0] == "C"
 
 
-def test_basic():
+def test_basic():  # TODO: split this test
 
     cats = Categorical(
         ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
@@ -142,9 +142,24 @@ def f(x):
         df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"]
     )
     tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
-    tm.assert_frame_equal(
-        df.groupby(c, observed=False).transform(lambda xs: np.max(xs)), df[["a"]]
-    )
+
+    gbc = df.groupby(c, observed=False)
+    with tm.assert_produces_warning(
+        FutureWarning, match="scalar maximum", check_stacklevel=False
+    ):
+        # stacklevel is thrown off (i think) bc the stack goes through numpy C code
+        result = gbc.transform(lambda xs: np.max(xs))
+    tm.assert_frame_equal(result, df[["a"]])
+
+    with tm.assert_produces_warning(None):
+        result2 = gbc.transform(lambda xs: np.max(xs, axis=0))
+        result3 = gbc.transform(max)
+        result4 = gbc.transform(np.maximum.reduce)
+        result5 = gbc.transform(lambda xs: np.maximum.reduce(xs))
+    tm.assert_frame_equal(result2, df[["a"]], check_dtype=False)
+    tm.assert_frame_equal(result3, df[["a"]], check_dtype=False)
+    tm.assert_frame_equal(result4, df[["a"]])
+    tm.assert_frame_equal(result5, df[["a"]])
 
     # Filter
     tm.assert_series_equal(df.a.groupby(c, observed=False).filter(np.all), df["a"])

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -69,20 +69,33 @@ def test_builtins_apply(keys, f):
     df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"])
     df["jolie"] = np.random.randn(1000)
 
+    gb = df.groupby(keys)
+
     fname = f.__name__
-    result = df.groupby(keys).apply(f)
+    result = gb.apply(f)
     ngroups = len(df.drop_duplicates(subset=keys))
 
     assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
     assert result.shape == (ngroups, 3), assert_msg
 
-    tm.assert_frame_equal(
-        result,  # numpy's equivalent function
-        df.groupby(keys).apply(getattr(np, fname)),
-    )
+    npfunc = getattr(np, fname)  # numpy's equivalent function
+    if f in [max, min]:
+        warn = FutureWarning
+    else:
+        warn = None
+    msg = "scalar (maximum|minimum) over the entire DataFrame"
+    with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
+        # stacklevel can be thrown off because (i think) the stack
+        #  goes through some of numpy's C code.
+        expected = gb.apply(npfunc)
+    tm.assert_frame_equal(result, expected)
+
+    with tm.assert_produces_warning(None):
+        expected2 = gb.apply(lambda x: npfunc(x, axis=0))
+    tm.assert_frame_equal(result, expected2)
 
     if f != sum:
-        expected = df.groupby(keys).agg(fname).reset_index()
+        expected = gb.agg(fname).reset_index()
         expected.set_index(keys, inplace=True, drop=False)
         tm.assert_frame_equal(result, expected, check_dtype=False)
 

diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py
@@ -348,20 +348,24 @@ def test_expanding_corr_pairwise(frame):
 
 @pytest.mark.parametrize(
     "func,static_comp",
-    [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
+    [
+        ("sum", np.sum),
+        ("mean", np.mean),
+        ("max", lambda x: np.max(x, axis=0)),
+        ("min", lambda x: np.min(x, axis=0)),
+    ],
     ids=["sum", "mean", "max", "min"],
 )
 def test_expanding_func(func, static_comp, frame_or_series):
     data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
     result = getattr(data.expanding(min_periods=1, axis=0), func)()
     assert isinstance(result, frame_or_series)
 
+    expected = static_comp(data[:11])
     if frame_or_series is Series:
-        tm.assert_almost_equal(result[10], static_comp(data[:11]))
+        tm.assert_almost_equal(result[10], expected)
     else:
-        tm.assert_series_equal(
-            result.iloc[10], static_comp(data[:11]), check_names=False
-        )
+        tm.assert_series_equal(result.iloc[10], expected, check_names=False)
 
 
 @pytest.mark.parametrize(