-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Implement cummax and cummin in _accumulate() for ordered Categorical arrays #58360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
fa74733
269dcfb
a8a1f37
9ec47bf
c224faf
330b60d
e927cda
0122334
4caea9f
098ad64
126bc19
498ad6d
18a86e2
5c5ac3c
7934f74
1abad3e
6be5f8c
babc835
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
""" | ||
categorical_accumulations.py is for accumulation algorithms using a mask-based | ||
approach for missing values. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import Callable | ||
|
||
import numpy as np | ||
|
||
|
||
def _cum_func( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you just include all this logic in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the logic into |
||
func: Callable, | ||
values: np.ndarray, | ||
*, | ||
skipna: bool = True, | ||
) -> np.ndarray: | ||
""" | ||
Accumulations for 1D categorical arrays. | ||
|
||
We will modify values in place to replace NAs with the appropriate fill value. | ||
|
||
Parameters | ||
---------- | ||
func : np.maximum.accumulate, np.minimum.accumulate | ||
values : np.ndarray | ||
Numpy integer array with the values and with NAs being -1. | ||
skipna : bool, default True | ||
Whether to skip NA. | ||
""" | ||
dtype_info = np.iinfo(values.dtype.type) | ||
try: | ||
fill_value = { | ||
np.maximum.accumulate: dtype_info.min, | ||
np.minimum.accumulate: dtype_info.max, | ||
}[func] | ||
except KeyError as err: | ||
raise NotImplementedError( | ||
f"No accumulation for {func} implemented on BaseMaskedArray" | ||
) from err | ||
|
||
mask = values == -1 | ||
values[mask] = fill_value | ||
|
||
if not skipna: | ||
mask = np.maximum.accumulate(mask) | ||
|
||
values = func(values) | ||
values[mask] = -1 | ||
|
||
return values | ||
|
||
|
||
def cummin(values: np.ndarray, *, skipna: bool = True) -> np.ndarray: | ||
return _cum_func(np.minimum.accumulate, values, skipna=skipna) | ||
|
||
|
||
def cummax(values: np.ndarray, *, skipna: bool = True) -> np.ndarray: | ||
return _cum_func(np.maximum.accumulate, values, skipna=skipna) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" | ||
Tests for Ordered Categorical Array cumulative operations. | ||
""" | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
import pandas as pd | ||
import pandas._testing as tm | ||
|
||
|
||
class TestAccumulator: | ||
@pytest.mark.parametrize( | ||
"method, input, output", | ||
[ | ||
["cummax", [1, 2, 1, 2, 3, 3, 2, 1], [1, 2, 2, 2, 3, 3, 3, 3]], | ||
["cummin", [3, 2, 3, 2, 1, 1, 2, 3], [3, 2, 2, 2, 1, 1, 1, 1]], | ||
], | ||
) | ||
def test_cummax_cummin_on_ordered_categorical(self, method, input, output): | ||
# GH#52335 | ||
result = pd.Categorical(input, ordered=True)._accumulate(method) | ||
bdwzhangumich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
tm.assert_extension_array_equal(result, pd.Categorical(output, ordered=True)) | ||
|
||
@pytest.mark.parametrize( | ||
"method, skip, input, output", | ||
[ | ||
["cummax", True, [1, np.nan, 2, 1, 3], [1, np.nan, 2, 2, 3]], | ||
[ | ||
"cummax", | ||
False, | ||
[1, np.nan, 2, 1, 3], | ||
[1, np.nan, np.nan, np.nan, np.nan], | ||
], | ||
["cummin", True, [3, np.nan, 2, 3, 1], [3, np.nan, 2, 2, 1]], | ||
[ | ||
"cummin", | ||
False, | ||
[3, np.nan, 2, 3, 1], | ||
[3, np.nan, np.nan, np.nan, np.nan], | ||
], | ||
], | ||
) | ||
def test_cummax_cummin_ordered_categorical_nan(self, skip, method, input, output): | ||
# GH#52335 | ||
result = pd.Categorical(input, ordered=True)._accumulate(method, skipna=skip) | ||
tm.assert_extension_array_equal( | ||
result, pd.Categorical(output, categories=[1, 2, 3], ordered=True) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,53 @@ def test_cummethods_bool_in_object_dtype(self, method, expected): | |
result = getattr(ser, method)() | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"method, order", | ||
[ | ||
["cummax", "abc"], | ||
["cummin", "cba"], | ||
], | ||
) | ||
def test_cummax_cummin_on_ordered_categorical(self, method, order): | ||
# GH#52335 | ||
cat = pd.CategoricalDtype(list(order), ordered=True) | ||
ser = pd.Series( | ||
list("ababcab"), dtype=pd.CategoricalDtype(list(order), ordered=True) | ||
) | ||
result = getattr(ser, method)() | ||
tm.assert_series_equal(result, pd.Series(list("abbbccc"), dtype=cat)) | ||
|
||
@pytest.mark.parametrize( | ||
"method, order", | ||
[ | ||
["cummax", "abc"], | ||
["cummin", "cba"], | ||
], | ||
) | ||
def test_cummax_cummin_ordered_categorical_nan(self, method, order): | ||
# GH#52335 | ||
ser = pd.Series( | ||
["a", np.nan, "b", "a", "c"], | ||
dtype=pd.CategoricalDtype(list(order), ordered=True), | ||
) | ||
result = getattr(ser, method)(skipna=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a |
||
tm.assert_series_equal( | ||
result, | ||
pd.Series( | ||
["a", np.nan, "b", "b", "c"], | ||
dtype=pd.CategoricalDtype(list(order), ordered=True), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you assign this to an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assigned all expected series to |
||
), | ||
) | ||
|
||
result = getattr(ser, method)(skipna=False) | ||
tm.assert_series_equal( | ||
result, | ||
pd.Series( | ||
["a", np.nan, np.nan, np.nan, np.nan], | ||
dtype=pd.CategoricalDtype(list(order), ordered=True), | ||
), | ||
) | ||
|
||
def test_cumprod_timedelta(self): | ||
# GH#48111 | ||
ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)]) | ||
|
Uh oh!
There was an error while loading. Please reload this page.