Skip to content

CLN: Enforce deprecation of method and limit in pct_change methods #57742

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ Removal of prior version deprecations/changes
- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`)
- Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`)
- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Removed argument ``limit`` from :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`.DataFrameGroupBy.pct_change`, and :meth:`.SeriesGroupBy.pct_change`; the argument ``method`` must be set to ``None`` and will be removed in a future version of pandas (:issue:`53520`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
Expand Down
56 changes: 8 additions & 48 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11120,8 +11120,7 @@ def describe(
def pct_change(
self,
periods: int = 1,
fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default,
limit: int | None | lib.NoDefault = lib.no_default,
fill_method: None = None,
freq=None,
**kwargs,
) -> Self:
Expand All @@ -11143,17 +11142,12 @@ def pct_change(
----------
periods : int, default 1
Periods to shift for forming percent change.
fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
How to handle NAs **before** computing percent changes.
fill_method : None
Must be None. This argument will be removed in a future version of pandas.

.. deprecated:: 2.1
All options of `fill_method` are deprecated except `fill_method=None`.

limit : int, default None
The number of consecutive NAs to fill before stopping.

.. deprecated:: 2.1

freq : DateOffset, timedelta, or str, optional
Increment to use from time series API (e.g. 'ME' or BDay()).
**kwargs
Expand Down Expand Up @@ -11260,52 +11254,18 @@ def pct_change(
APPL -0.252395 -0.011860 NaN
"""
# GH#53491
if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
warnings.warn(
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(self).__name__}.pct_change are deprecated and will be removed "
"in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
stacklevel=find_stack_level(),
)
if fill_method is lib.no_default:
if limit is lib.no_default:
cols = self.items() if self.ndim == 2 else [(None, self)]
for _, col in cols:
if len(col) > 0:
mask = col.isna().values
mask = mask[np.argmax(~mask) :]
if mask.any():
warnings.warn(
"The default fill_method='pad' in "
f"{type(self).__name__}.pct_change is deprecated and "
"will be removed in a future version. Either fill in "
"any non-leading NA values prior to calling pct_change "
"or specify 'fill_method=None' to not fill NA values.",
FutureWarning,
stacklevel=find_stack_level(),
)
break
fill_method = "pad"
if limit is lib.no_default:
limit = None
if fill_method is not None:
raise ValueError(f"fill_method must be None; got {fill_method=}.")

axis = self._get_axis_number(kwargs.pop("axis", "index"))
if fill_method is None:
data = self
else:
data = self._pad_or_backfill(fill_method, axis=axis, limit=limit)

shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs)
shifted = self.shift(periods=periods, freq=freq, axis=axis, **kwargs)
# Unsupported left operand type for / ("Self")
rs = data / shifted - 1 # type: ignore[operator]
rs = self / shifted - 1 # type: ignore[operator]
if freq is not None:
# Shift method is implemented differently when freq is not None
# We want to restore the original index
rs = rs.loc[~rs.index.duplicated()]
rs = rs.reindex_like(data)
rs = rs.reindex_like(self)
return rs.__finalize__(self, method="pct_change")

@final
Expand Down
57 changes: 11 additions & 46 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class providing the base-class of operations.
AnyArrayLike,
ArrayLike,
DtypeObj,
FillnaOptions,
IndexLabel,
IntervalClosedType,
NDFrameT,
Expand Down Expand Up @@ -5147,8 +5146,7 @@ def diff(
def pct_change(
self,
periods: int = 1,
fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default,
limit: int | None | lib.NoDefault = lib.no_default,
fill_method: None = None,
freq=None,
):
"""
Expand All @@ -5161,19 +5159,11 @@ def pct_change(
a period of 1 means adjacent elements are compared, whereas a period
of 2 compares every other element.

fill_method : FillnaOptions or None, default None
Specifies how to handle missing values after the initial shift
operation necessary for percentage change calculation. Users are
encouraged to handle missing values manually in future versions.
Valid options are:
- A FillnaOptions value ('ffill', 'bfill') for forward or backward filling.
- None to avoid filling.
Note: Usage is discouraged due to impending deprecation.
fill_method : None
Must be None. This argument will be removed in a future version of pandas.

limit : int or None, default None
The maximum number of consecutive NA values to fill, based on the chosen
`fill_method`. Address NaN values prior to using `pct_change` as this
parameter is nearing deprecation.
.. deprecated:: 2.1
All options of `fill_method` are deprecated except `fill_method=None`.

freq : str, pandas offset object, or None, default None
The frequency increment for time series data (e.g., 'M' for month-end).
Expand Down Expand Up @@ -5227,49 +5217,24 @@ def pct_change(
goldfish 0.2 0.125
"""
# GH#53491
if fill_method not in (lib.no_default, None) or limit is not lib.no_default:
warnings.warn(
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(self).__name__}.pct_change are deprecated and will be removed "
"in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
stacklevel=find_stack_level(),
)
if fill_method is lib.no_default:
if limit is lib.no_default and any(
grp.isna().values.any() for _, grp in self
):
warnings.warn(
"The default fill_method='ffill' in "
f"{type(self).__name__}.pct_change is deprecated and will "
"be removed in a future version. Either fill in any "
"non-leading NA values prior to calling pct_change or "
"specify 'fill_method=None' to not fill NA values.",
FutureWarning,
stacklevel=find_stack_level(),
)
fill_method = "ffill"
if limit is lib.no_default:
limit = None
if fill_method is not None:
raise ValueError(f"fill_method must be None; got {fill_method=}.")

# TODO(GH#23918): Remove this conditional for SeriesGroupBy when
# GH#23918 is fixed
if freq is not None:
f = lambda x: x.pct_change(
periods=periods,
fill_method=fill_method,
limit=limit,
freq=freq,
axis=0,
)
return self._python_apply_general(f, self._selected_obj, is_transform=True)

if fill_method is None: # GH30463
fill_method = "ffill"
limit = 0
filled = getattr(self, fill_method)(limit=limit)
op = "ffill"
else:
op = fill_method
filled = getattr(self, op)(limit=0)
fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys)
shifted = fill_grp.shift(periods=periods, freq=freq)
return (filled / shifted) - 1
Expand Down
119 changes: 32 additions & 87 deletions pandas/tests/frame/methods/test_pct_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,17 @@

class TestDataFramePctChange:
@pytest.mark.parametrize(
"periods, fill_method, limit, exp",
"periods, exp",
[
(1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]),
(1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]),
(1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]),
(1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]),
(-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]),
(-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]),
(-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
(-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
(1, [np.nan, np.nan, np.nan, 1, 1, 1.5, np.nan, np.nan]),
(-1, [np.nan, np.nan, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]),
],
)
def test_pct_change_with_nas(
self, periods, fill_method, limit, exp, frame_or_series
):
def test_pct_change_with_nas(self, periods, exp, frame_or_series):
vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
obj = frame_or_series(vals)

msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
f"{type(obj).__name__}.pct_change are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
res = obj.pct_change(periods=periods, fill_method=fill_method, limit=limit)
res = obj.pct_change(periods=periods)
tm.assert_equal(res, frame_or_series(exp))

def test_pct_change_numeric(self):
Expand All @@ -45,124 +32,82 @@ def test_pct_change_numeric(self):
pnl.iat[1, 1] = np.nan
pnl.iat[2, 3] = 60

msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

for axis in range(2):
expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1

with tm.assert_produces_warning(FutureWarning, match=msg):
result = pnl.pct_change(axis=axis, fill_method="pad")
expected = pnl / pnl.shift(axis=axis) - 1
result = pnl.pct_change(axis=axis)
tm.assert_frame_equal(result, expected)

def test_pct_change(self, datetime_frame):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

rs = datetime_frame.pct_change(fill_method=None)
rs = datetime_frame.pct_change()
tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1)

rs = datetime_frame.pct_change(2)
filled = datetime_frame.ffill()
tm.assert_frame_equal(rs, filled / filled.shift(2) - 1)

with tm.assert_produces_warning(FutureWarning, match=msg):
rs = datetime_frame.pct_change(fill_method="bfill", limit=1)
filled = datetime_frame.bfill(limit=1)
tm.assert_frame_equal(rs, filled / filled.shift(1) - 1)
rs = datetime_frame.pct_change()
tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1)

rs = datetime_frame.pct_change(freq="5D")
filled = datetime_frame.ffill()
tm.assert_frame_equal(
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
rs,
(datetime_frame / datetime_frame.shift(freq="5D") - 1).reindex_like(
datetime_frame
),
)

def test_pct_change_shift_over_nas(self):
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])

df = DataFrame({"a": s, "b": s})

msg = "The default fill_method='pad' in DataFrame.pct_change is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
chg = df.pct_change()

expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
chg = df.pct_change()
expected = Series([np.nan, 0.5, np.nan, np.nan, 0.2])
edf = DataFrame({"a": expected, "b": expected})
tm.assert_frame_equal(chg, edf)

@pytest.mark.parametrize(
"freq, periods, fill_method, limit",
"freq, periods",
[
("5B", 5, None, None),
("3B", 3, None, None),
("3B", 3, "bfill", None),
("7B", 7, "pad", 1),
("7B", 7, "bfill", 3),
("14B", 14, None, None),
("5B", 5),
("3B", 3),
("14B", 14),
],
)
def test_pct_change_periods_freq(
self, datetime_frame, freq, periods, fill_method, limit
self,
datetime_frame,
freq,
periods,
):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)

# GH#7292
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = datetime_frame.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = datetime_frame.pct_change(
periods, fill_method=fill_method, limit=limit
)
rs_freq = datetime_frame.pct_change(freq=freq)
rs_periods = datetime_frame.pct_change(periods)
tm.assert_frame_equal(rs_freq, rs_periods)

empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = empty_ts.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = empty_ts.pct_change(
periods, fill_method=fill_method, limit=limit
)
rs_freq = empty_ts.pct_change(freq=freq)
rs_periods = empty_ts.pct_change(periods)
tm.assert_frame_equal(rs_freq, rs_periods)


@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
def test_pct_change_with_duplicated_indices(fill_method):
def test_pct_change_with_duplicated_indices():
# GH30463
data = DataFrame(
{0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3
)

warn = None if fill_method is None else FutureWarning
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"DataFrame.pct_change are deprecated"
)
with tm.assert_produces_warning(warn, match=msg):
result = data.pct_change(fill_method=fill_method)
result = data.pct_change()

if fill_method is None:
second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0]
else:
second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0]
second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0]
expected = DataFrame(
{0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column},
index=["a", "b"] * 3,
)
tm.assert_frame_equal(result, expected)


def test_pct_change_none_beginning_no_warning():
def test_pct_change_none_beginning():
# GH#54481
df = DataFrame(
[
Expand Down
Loading