Skip to content

BUG: Fix exceptions when Series.interpolate's order parameter is missing or invalid #25246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ Indexing
Missing
^^^^^^^

-
- Fixed misleading exception message in :meth:`Series.missing` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`).
-
-

Expand Down
30 changes: 12 additions & 18 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,24 +1115,18 @@ def check_int_bool(self, inplace):
fill_value=fill_value,
coerce=coerce,
downcast=downcast)
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
except ValueError:
m = None

if m is not None:
r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

raise ValueError("invalid method '{0}' to interpolate.".format(method))
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)

r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, coerce=False,
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,10 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
bounds_error=bounds_error)
new_y = terp(new_x)
elif method == 'spline':
# GH #10633
if not order:
raise ValueError("order needs to be specified and greater than 0")
# GH #10633, #24014
if isna(order) or (order <= 0):
raise ValueError("order needs to be specified and greater than 0; "
"got order: {}".format(order))
terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
new_y = terp(new_x)
else:
Expand Down
86 changes: 52 additions & 34 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,13 @@ def test_series_pad_backfill_limit(self):


class TestSeriesInterpolateData():
# This list does not include method 'time'. That method requires a Series
# with a DatetimeIndex, and is tested separately below.
NONTEMPORAL_INTERPOLATE_METHODS = [
'linear', 'index', 'values', 'nearest', 'zero', 'slinear',
'quadratic', 'cubic', 'barycentric', 'krogh', 'polynomial', 'spline',
'piecewise_polynomial', 'from_derivatives', 'pchip', 'akima',
]

def test_interpolate(self, datetime_series, string_series):
ts = Series(np.arange(len(datetime_series), dtype=float),
Expand All @@ -875,12 +882,12 @@ def test_interpolate(self, datetime_series, string_series):
time_interp = ord_ts_copy.interpolate(method='time')
tm.assert_series_equal(time_interp, ord_ts)

# try time interpolation on a non-TimeSeries
# Only raises ValueError if there are NaNs.
non_ts = string_series.copy()
non_ts[0] = np.NaN
msg = ("time-weighted interpolation only works on Series or DataFrames"
" with a DatetimeIndex")
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.NaN])
msg = ("time-weighted interpolation only works on Series.* "
"with a DatetimeIndex")
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method='time')

Expand Down Expand Up @@ -1061,21 +1068,36 @@ def test_interp_limit(self):
result = s.interpolate(method='linear', limit=2)
assert_series_equal(result, expected)

# GH 9217, make sure limit is an int and greater than 0
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial', None,
'from_derivatives', 'pchip', 'akima']
@pytest.mark.parametrize("method", NONTEMPORAL_INTERPOLATE_METHODS)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use a fixture for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand where you’re coming from, with the same ”method” parameterization in the two tests. I guess the fixture would be called nontemporal_method_name in this approach?

Since this test uses two parameters (”method” and ”limit”), I think it’s a bit clearer to simply have two parallel pytest forms (in this case, two usages of pytest.mark.parmeterize), to make it clear that the Cartesian product of parameters is being used.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a maintenance perspective, fixture is much preferred over multiple usages of the parameter.

I guess the fixture would be called nontemporal_method_name in this approach?

Yes, that would be reasonable.

@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, method, limit):
# GH 9217: make sure limit is greater than zero
s = pd.Series([1, 2, np.nan, np.nan, 5])
msg = (r"Limit must be greater than 0|"
"time-weighted interpolation only works on Series or"
r" DataFrames with a DatetimeIndex|"
r"invalid method '(polynomial|spline|None)' to interpolate|"
"Limit must be an integer")
for limit in [-1, 0, 1., 2.]:
for method in methods:
with pytest.raises(ValueError, match=msg):
s.interpolate(limit=limit, method=method)
with pytest.raises(ValueError, match="Limit must be greater than 0"):
# Pass order=1 for 'spline' and 'polynomial'.
s.interpolate(limit=limit, method=method, order=1)

@pytest.mark.parametrize("method", NONTEMPORAL_INTERPOLATE_METHODS)
def test_interpolate_invalid_float_limit(self, method):
# GH 9217: make sure limit is an integer.
s = pd.Series([1, 2, np.nan, np.nan, 5])
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
# Pass order=1 for 'spline' and 'polynomial'.
s.interpolate(limit=limit, method=method, order=1)

@pytest.mark.parametrize("invalid_method", [None, 'nonexistent_method'])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])

msg = "method must be one of.* Got '{}' instead".format(invalid_method)
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)

# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)

def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
Expand Down Expand Up @@ -1276,11 +1298,20 @@ def test_interp_limit_no_nans(self):
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ['polynomial', 'spline'])
def test_no_order(self, method):
# see GH-10633, GH-24014
s = Series([0, 1, np.nan, 3])
msg = "invalid method '{}' to interpolate".format(method)
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)

@td.skip_if_no_scipy
@pytest.mark.parametrize('order', [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=order)

@td.skip_if_no_scipy
def test_spline(self):
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
Expand Down Expand Up @@ -1313,19 +1344,6 @@ def test_spline_interpolation(self):
expected1 = s.interpolate(method='spline', order=1)
assert_series_equal(result1, expected1)

@td.skip_if_no_scipy
def test_spline_error(self):
# see gh-10633
s = pd.Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
msg = "invalid method 'spline' to interpolate"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline')

msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=0)

def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3],
Expand Down