Skip to content

DEPR: inconsistent series[i:j] slicing with Int64Index GH#45162 #45324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,50 @@ Other API changes

Deprecations
~~~~~~~~~~~~

.. _whatsnew_150.deprecations.int_slicing_series:

In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`).

For example:

.. ipython:: python

ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11])

In the old behavior, ``ser[2:4]`` treats the slice as positional:

*Old behavior*:

.. code-block:: ipython

In [3]: ser[2:4]
Out[3]:
5 3
7 4
dtype: int64

In a future version, this will be treated as label-based:

*Future behavior*:

.. code-block:: ipython

In [4]: ser.loc[2:4]
Out[4]:
2 1
3 2
dtype: int64

To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior,
use ``series.loc[i:j]``.

Slicing on a :class:`DataFrame` will not be affected.

.. _whatsnew_150.deprecations.other:

Other Deprecations
^^^^^^^^^^^^^^^^^^
- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
-

Expand Down
43 changes: 42 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
ABCDatetimeIndex,
ABCMultiIndex,
ABCPeriodIndex,
ABCRangeIndex,
ABCSeries,
ABCTimedeltaIndex,
)
Expand Down Expand Up @@ -3989,7 +3990,7 @@ def _validate_positional_slice(self, key: slice) -> None:
self._validate_indexer("positional", key.stop, "iloc")
self._validate_indexer("positional", key.step, "iloc")

def _convert_slice_indexer(self, key: slice, kind: str_t):
def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):
"""
Convert a slice indexer.

Expand All @@ -4000,6 +4001,9 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
----------
key : label of the slice bound
kind : {'loc', 'getitem'}
is_frame : bool, default False
Whether this is a slice called on DataFrame.__getitem__
as opposed to Series.__getitem__
"""
assert kind in ["loc", "getitem"], kind

Expand All @@ -4020,7 +4024,44 @@ def is_int(v):
called from the getitem slicers, validate that we are in fact
integers
"""
if self.is_integer():
if is_frame:
# unambiguously positional, no deprecation
pass
elif start is None and stop is None:
# label-based vs positional is irrelevant
pass
elif isinstance(self, ABCRangeIndex) and self._range == range(
len(self)
):
# In this case there is no difference between label-based
# and positional, so nothing will change.
pass
elif (
self.dtype.kind in ["i", "u"]
and self._is_strictly_monotonic_increasing
and len(self) > 0
and self[0] == 0
and self[-1] == len(self) - 1
):
# We are range-like, e.g. created with Index(np.arange(N))
pass
elif not is_index_slice:
# we're going to raise, so don't bother warning, e.g.
# test_integer_positional_indexing
pass
else:
warnings.warn(
"The behavior of `series[i:j]` with an integer-dtype index "
"is deprecated. In a future version, this will be treated "
"as *label-based* indexing, consistent with e.g. `series[i]` "
"lookups. To retain the old behavior, use `series.iloc[i:j]`. "
"To get the future behavior, use `series.loc[i:j]`.",
FutureWarning,
stacklevel=find_stack_level(),
)
if self.is_integer() or is_index_slice:
# Note: these checks are redundant if we know is_index_slice
self._validate_indexer("slice", key.start, "getitem")
self._validate_indexer("slice", key.stop, "getitem")
self._validate_indexer("slice", key.step, "getitem")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ def _index_as_unique(self) -> bool:
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
)

def _convert_slice_indexer(self, key: slice, kind: str):
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
if not (key.step is None or key.step == 1):
# GH#31658 if label-based, we require step == 1,
# if positional, we disallow float start/stop
Expand All @@ -766,7 +766,7 @@ def _convert_slice_indexer(self, key: slice, kind: str):
# i.e. this cannot be interpreted as a positional slice
raise ValueError(msg)

return super()._convert_slice_indexer(key, kind)
return super()._convert_slice_indexer(key, kind, is_frame=is_frame)

@cache_readonly
def _should_fallback_to_positional(self) -> bool:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,15 @@ def _should_fallback_to_positional(self) -> bool:
return False

@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str):
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
if is_float_dtype(self.dtype):
assert kind in ["loc", "getitem"]

# We always treat __getitem__ slicing as label-based
# translate to locations
return self.slice_indexer(key.start, key.stop, key.step)

return super()._convert_slice_indexer(key, kind=kind)
return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame)

@doc(Index._maybe_cast_slice_bound)
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2313,7 +2313,7 @@ def convert_to_index_sliceable(obj: DataFrame, key):
"""
idx = obj.index
if isinstance(key, slice):
return idx._convert_slice_indexer(key, kind="getitem")
return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)

elif isinstance(key, str):

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ def test_get(self, data):
expected = s.iloc[[2, 3]]
self.assert_series_equal(result, expected)

result = s.get(slice(2))
with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = s.get(slice(2))
expected = s.iloc[[0, 1]]
self.assert_series_equal(result, expected)

Expand All @@ -336,7 +337,9 @@ def test_get(self, data):

# GH 21257
s = pd.Series(data)
s2 = s[::2]
with tm.assert_produces_warning(None):
# GH#45324 make sure we aren't giving a spurious FutureWarning
s2 = s[::2]
assert s2.get(1) is None

def test_take_sequence(self, data):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def test_iloc_row_slice_view(self, using_array_manager):
exp_col = original[2].copy()
# TODO(ArrayManager) verify it is expected that the original didn't change
if not using_array_manager:
exp_col[4:8] = 0.0
exp_col._values[4:8] = 0.0
tm.assert_series_equal(df[2], exp_col)

def test_iloc_col(self):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,11 +367,11 @@ def test_apply_frame_not_as_index_column_name(df):

def test_apply_frame_concat_series():
def trans(group):
return group.groupby("B")["C"].sum().sort_values()[:2]
return group.groupby("B")["C"].sum().sort_values().iloc[:2]

def trans2(group):
grouped = group.groupby(df.reindex(group.index)["B"])
return grouped.sum().sort_values()[:2]
return grouped.sum().sort_values().iloc[:2]

df = DataFrame(
{
Expand Down Expand Up @@ -409,7 +409,7 @@ def test_apply_chunk_view():
# Low level tinkering could be unsafe, make sure not
df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})

result = df.groupby("key", group_keys=False).apply(lambda x: x[:2])
result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2])
expected = df.take([0, 1, 3, 4, 6, 7])
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/numeric/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box):
)
tm.assert_index_equal(result.index, expected)

tm.assert_equal(result, series[:3])
tm.assert_equal(result, series.iloc[:3])


def test_float64_index_equals():
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ def test_integer_positional_indexing(self, idx):
"""
s = Series(range(2, 6), index=range(2, 6))

result = s[2:4]
with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = s[2:4]
expected = s.iloc[2:4]
tm.assert_series_equal(result, expected)

Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,8 +1386,10 @@ def test_iloc(self):
tm.assert_series_equal(result, expected)

# test slice is a view
result[:] = 0
assert (ser[1:3] == 0).all()
with tm.assert_produces_warning(None):
# GH#45324 make sure we aren't giving a spurious FutureWarning
result[:] = 0
assert (ser.iloc[1:3] == 0).all()

# list of integers
result = ser.iloc[[0, 2, 3, 4, 5]]
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def test_setitem_ndarray_1d_2(self):

msg = "Must have equal len keys and value when setting with an iterable"
with pytest.raises(ValueError, match=msg):
df[2:5] = np.arange(1, 4) * 1j
with tm.assert_produces_warning(FutureWarning, match="label-based"):
df[2:5] = np.arange(1, 4) * 1j

def test_getitem_ndarray_3d(
self, index, frame_or_series, indexer_sli, using_array_manager
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/series/indexing/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def test_get_with_ea(arr):
expected = ser.iloc[[2, 3]]
tm.assert_series_equal(result, expected)

result = ser.get(slice(2))
with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = ser.get(slice(2))
expected = ser.iloc[[0, 1]]
tm.assert_series_equal(result, expected)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,8 @@ def test_getitem_slice_bug(self):
def test_getitem_slice_integers(self):
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])

result = ser[:4]
with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = ser[:4]
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
tm.assert_series_equal(result, expected)

Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,15 @@ def test_setitem_slice(self):
def test_setitem_slice_integers(self):
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])

ser[:4] = 0
assert (ser[:4] == 0).all()
assert not (ser[4:] == 0).any()
msg = r"In a future version, this will be treated as \*label-based\* indexing"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser[:4] = 0
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
assert (ser[:4] == 0).all()
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not (ser[4:] == 0).any()

def test_setitem_slicestep(self):
# caught this bug when writing tests
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/methods/test_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,5 @@ def test_item(self):

# Case where ser[0] would not work
ser = Series(dti, index=[5, 6])
val = ser[:1].item()
val = ser.iloc[:1].item()
assert val == dti[0]
3 changes: 2 additions & 1 deletion pandas/tests/window/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ def test_center(raw):
expected = (
concat([obj, Series([np.NaN] * 9)])
.rolling(20, min_periods=15)
.apply(f, raw=raw)[9:]
.apply(f, raw=raw)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,11 +552,11 @@ def test_ew_min_periods(min_periods, name):

@pytest.mark.parametrize("name", ["cov", "corr"])
def test_ewm_corr_cov(name):
A = Series(np.random.randn(50), index=np.arange(50))
A = Series(np.random.randn(50), index=range(50))
B = A[2:] + np.random.randn(48)

A[:10] = np.NaN
B[-10:] = np.NaN
B.iloc[-10:] = np.NaN

result = getattr(A.ewm(com=20, min_periods=5), name)(B)
assert np.isnan(result.values[:14]).all()
Expand All @@ -567,11 +567,11 @@ def test_ewm_corr_cov(name):
@pytest.mark.parametrize("name", ["cov", "corr"])
def test_ewm_corr_cov_min_periods(name, min_periods):
# GH 7898
A = Series(np.random.randn(50), index=np.arange(50))
A = Series(np.random.randn(50), index=range(50))
B = A[2:] + np.random.randn(48)

A[:10] = np.NaN
B[-10:] = np.NaN
B.iloc[-10:] = np.NaN

result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
# binary functions (ewmcov, ewmcorr) with bias=False require at
Expand All @@ -593,7 +593,7 @@ def test_ewm_corr_cov_min_periods(name, min_periods):

@pytest.mark.parametrize("name", ["cov", "corr"])
def test_different_input_array_raise_exception(name):
A = Series(np.random.randn(50), index=np.arange(50))
A = Series(np.random.randn(50), index=range(50))
A[:10] = np.NaN

msg = "other must be a DataFrame or Series"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,7 @@ def test_rolling_corr_timedelta_index(index, window):
# GH: 31286
x = Series([1, 2, 3, 4, 5], index=index)
y = x.copy()
x[0:2] = 0.0
x.iloc[0:2] = 0.0
result = x.rolling(window).corr(y)
expected = Series([np.nan, np.nan, 1, 1, 1], index=index)
tm.assert_almost_equal(result, expected)
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/window/test_rolling_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,13 @@ def test_center(roll_func, kwargs, minp):
result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
**kwargs
)
expected = getattr(
concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
)(**kwargs)[9:].reset_index(drop=True)
expected = (
getattr(
concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
)(**kwargs)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/window/test_rolling_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ def test_center(q):
expected = (
concat([obj, Series([np.NaN] * 9)])
.rolling(20)
.quantile(q)[9:]
.quantile(q)
.iloc[9:]
.reset_index(drop=True)
)
tm.assert_series_equal(result, expected)
Expand Down
Loading