pandas-dev · mroeschke · Apr 23, 2024 · Apr 14, 2024 · Apr 16, 2024 · Apr 16, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -41,6 +41,7 @@ Other enhancements
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
+- Implement :meth:`ExtensionArray._accumulate` operations ``cummax`` and ``cummin`` in :class:`Categorical` (:issue:`52335`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:

diff --git a/pandas/core/array_algos/categorical_accumulations.py b/pandas/core/array_algos/categorical_accumulations.py
@@ -0,0 +1,60 @@
+"""
+categorical_accumulations.py is for accumulation algorithms using a mask-based
+approach for missing values.
+"""
+
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+
+
+def _cum_func(
+    func: Callable,
+    values: np.ndarray,
+    *,
+    skipna: bool = True,
+) -> np.ndarray:
+    """
+    Accumulations for 1D categorical arrays.
+
+    We will modify values in place to replace NAs with the appropriate fill value.
+
+    Parameters
+    ----------
+    func : np.maximum.accumulate, np.minimum.accumulate
+    values : np.ndarray
+        Numpy integer array with the values and with NAs being -1.
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    dtype_info = np.iinfo(values.dtype.type)
+    try:
+        fill_value = {
+            np.maximum.accumulate: dtype_info.min,
+            np.minimum.accumulate: dtype_info.max,
+        }[func]
+    except KeyError as err:
+        raise NotImplementedError(
+            f"No accumulation for {func} implemented on BaseMaskedArray"
+        ) from err
+
+    mask = values == -1
+    values[mask] = fill_value
+
+    if not skipna:
+        mask = np.maximum.accumulate(mask)
+
+    values = func(values)
+    values[mask] = -1
+
+    return values
+
+
+def cummin(values: np.ndarray, *, skipna: bool = True) -> np.ndarray:
+    return _cum_func(np.minimum.accumulate, values, skipna=skipna)
+
+
+def cummax(values: np.ndarray, *, skipna: bool = True) -> np.ndarray:
+    return _cum_func(np.maximum.accumulate, values, skipna=skipna)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -71,6 +71,7 @@
     factorize,
     take_nd,
 )
+from pandas.core.array_algos import categorical_accumulations
 from pandas.core.arrays._mixins import (
     NDArrayBackedExtensionArray,
     ravel_compat,
@@ -2508,6 +2509,19 @@ def equals(self, other: object) -> bool:
             return np.array_equal(self._codes, other._codes)
         return False
 
+    def _accumulate(self, name: str, skipna: bool = True, **kwargs) -> Self:
+        if name not in {"cummin", "cummax"}:
+            raise TypeError(f"Accumulation {name} not supported for {type(self)}")
+
+        self.check_for_ordered(name)
+
+        codes = self.codes.copy()
+
+        op = getattr(categorical_accumulations, name)
+        codes = op(codes, skipna=skipna, **kwargs)
+
+        return self._simple_new(codes, dtype=self._dtype)
+
     @classmethod
     def _concat_same_type(cls, to_concat: Sequence[Self], axis: AxisInt = 0) -> Self:
         from pandas.core.dtypes.concat import union_categoricals

diff --git a/pandas/tests/arrays/categorical/test_cumulative.py b/pandas/tests/arrays/categorical/test_cumulative.py
@@ -0,0 +1,49 @@
+"""
+Tests for Ordered Categorical Array cumulative operations.
+"""
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+class TestAccumulator:
+    @pytest.mark.parametrize(
+        "method, input, output",
+        [
+            ["cummax", [1, 2, 1, 2, 3, 3, 2, 1], [1, 2, 2, 2, 3, 3, 3, 3]],
+            ["cummin", [3, 2, 3, 2, 1, 1, 2, 3], [3, 2, 2, 2, 1, 1, 1, 1]],
+        ],
+    )
+    def test_cummax_cummin_on_ordered_categorical(self, method, input, output):
+        # GH#52335
+        result = pd.Categorical(input, ordered=True)._accumulate(method)
+        tm.assert_extension_array_equal(result, pd.Categorical(output, ordered=True))
+
+    @pytest.mark.parametrize(
+        "method, skip, input, output",
+        [
+            ["cummax", True, [1, np.nan, 2, 1, 3], [1, np.nan, 2, 2, 3]],
+            [
+                "cummax",
+                False,
+                [1, np.nan, 2, 1, 3],
+                [1, np.nan, np.nan, np.nan, np.nan],
+            ],
+            ["cummin", True, [3, np.nan, 2, 3, 1], [3, np.nan, 2, 2, 1]],
+            [
+                "cummin",
+                False,
+                [3, np.nan, 2, 3, 1],
+                [3, np.nan, np.nan, np.nan, np.nan],
+            ],
+        ],
+    )
+    def test_cummax_cummin_ordered_categorical_nan(self, skip, method, input, output):
+        # GH#52335
+        result = pd.Categorical(input, ordered=True)._accumulate(method, skipna=skip)
+        tm.assert_extension_array_equal(
+            result, pd.Categorical(output, categories=[1, 2, 3], ordered=True)
+        )
diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py
@@ -170,6 +170,53 @@ def test_cummethods_bool_in_object_dtype(self, method, expected):
         result = getattr(ser, method)()
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "method, order",
+        [
+            ["cummax", "abc"],
+            ["cummin", "cba"],
+        ],
+    )
+    def test_cummax_cummin_on_ordered_categorical(self, method, order):
+        # GH#52335
+        cat = pd.CategoricalDtype(list(order), ordered=True)
+        ser = pd.Series(
+            list("ababcab"), dtype=pd.CategoricalDtype(list(order), ordered=True)
+        )
+        result = getattr(ser, method)()
+        tm.assert_series_equal(result, pd.Series(list("abbbccc"), dtype=cat))
+
+    @pytest.mark.parametrize(
+        "method, order",
+        [
+            ["cummax", "abc"],
+            ["cummin", "cba"],
+        ],
+    )
+    def test_cummax_cummin_ordered_categorical_nan(self, method, order):
+        # GH#52335
+        ser = pd.Series(
+            ["a", np.nan, "b", "a", "c"],
+            dtype=pd.CategoricalDtype(list(order), ordered=True),
+        )
+        result = getattr(ser, method)(skipna=True)
+        tm.assert_series_equal(
+            result,
+            pd.Series(
+                ["a", np.nan, "b", "b", "c"],
+                dtype=pd.CategoricalDtype(list(order), ordered=True),
+            ),
+        )
+
+        result = getattr(ser, method)(skipna=False)
+        tm.assert_series_equal(
+            result,
+            pd.Series(
+                ["a", np.nan, np.nan, np.nan, np.nan],
+                dtype=pd.CategoricalDtype(list(order), ordered=True),
+            ),
+        )
+
     def test_cumprod_timedelta(self):
         # GH#48111
         ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)])