Skip to content

DEPR: Deprecate set and dict as indexers #45052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ Other Deprecations
- Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`)
- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
- Deprecated passing ``set`` or ``dict`` as indexer for :meth:`DataFrame.loc.__setitem__`, :meth:`DataFrame.loc.__getitem__`, :meth:`Series.loc.__setitem__`, :meth:`Series.loc.__getitem__`, :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__` and :meth:`Series.__setitem__` (:issue:`42825`)
- Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`)
- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`)
-
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
)
from pandas.core.indexing import (
check_bool_indexer,
check_deprecated_indexers,
convert_to_index_sliceable,
)
from pandas.core.internals import (
Expand Down Expand Up @@ -3457,6 +3458,7 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]:
yield self._get_column_array(i)

def __getitem__(self, key):
check_deprecated_indexers(key)
key = lib.item_from_zerodim(key)
key = com.apply_if_callable(key, self)

Expand Down
35 changes: 35 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,10 @@ def _get_setitem_indexer(self, key):
if self.name == "loc":
self._ensure_listlike_indexer(key)

if isinstance(key, tuple):
for x in key:
check_deprecated_indexers(x)

if self.axis is not None:
return self._convert_tuple(key)

Expand Down Expand Up @@ -698,6 +702,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
)

def __setitem__(self, key, value):
check_deprecated_indexers(key)
if isinstance(key, tuple):
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
Expand Down Expand Up @@ -890,6 +895,9 @@ def _getitem_nested_tuple(self, tup: tuple):
# we have a nested tuple so have at least 1 multi-index level
# we should be able to match up the dimensionality here

for key in tup:
check_deprecated_indexers(key)

# we have too many indexers for our dim, but have at least 1
# multi-index dimension, try to see if we have something like
# a tuple passed to a series with a multi-index
Expand Down Expand Up @@ -943,6 +951,7 @@ def _convert_to_indexer(self, key, axis: int):
raise AbstractMethodError(self)

def __getitem__(self, key):
check_deprecated_indexers(key)
if type(key) is tuple:
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
Expand Down Expand Up @@ -2444,3 +2453,29 @@ def need_slice(obj: slice) -> bool:
or obj.stop is not None
or (obj.step is not None and obj.step != 1)
)


def check_deprecated_indexers(key) -> None:
"""Checks if the key is a deprecated indexer."""
if (
isinstance(key, set)
or isinstance(key, tuple)
and any(isinstance(x, set) for x in key)
):
warnings.warn(
"Passing a set as an indexer is deprecated and will raise in "
"a future version. Use a list instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if (
isinstance(key, dict)
or isinstance(key, tuple)
and any(isinstance(x, dict) for x in key)
):
warnings.warn(
"Passing a dict as an indexer is deprecated and will raise in "
"a future version. Use a list instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@
ensure_index,
)
import pandas.core.indexes.base as ibase
from pandas.core.indexing import check_bool_indexer
from pandas.core.indexing import (
check_bool_indexer,
check_deprecated_indexers,
)
from pandas.core.internals import (
SingleArrayManager,
SingleBlockManager,
Expand Down Expand Up @@ -939,6 +942,7 @@ def _slice(self, slobj: slice, axis: int = 0) -> Series:
return self._get_values(slobj)

def __getitem__(self, key):
check_deprecated_indexers(key)
key = com.apply_if_callable(key, self)

if key is Ellipsis:
Expand Down Expand Up @@ -1065,6 +1069,7 @@ def _get_value(self, label, takeable: bool = False):
return self.index._get_values_for_loc(self, loc, label)

def __setitem__(self, key, value) -> None:
check_deprecated_indexers(key)
key = com.apply_if_callable(key, self)
cacher_needs_updating = self._check_is_chained_assignment_possible()

Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,11 @@ def test_getitem_listlike(self, idx_type, levels, float_frame):
idx = idx_type(keys)
idx_check = list(idx_type(keys))

result = frame[idx]
if isinstance(idx, (set, dict)):
with tm.assert_produces_warning(FutureWarning):
result = frame[idx]
else:
result = frame[idx]

expected = frame.loc[:, idx_check]
expected.columns.names = frame.columns.names
Expand All @@ -143,7 +147,8 @@ def test_getitem_listlike(self, idx_type, levels, float_frame):

idx = idx_type(keys + [missing])
with pytest.raises(KeyError, match="not in index"):
frame[idx]
with tm.assert_produces_warning(FutureWarning):
frame[idx]

def test_getitem_iloc_generator(self):
# GH#39614
Expand Down Expand Up @@ -388,3 +393,14 @@ def test_getitem_datetime_slice(self):
),
)
tm.assert_frame_equal(result, expected)


class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825
df = DataFrame(
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
)
with tm.assert_produces_warning(FutureWarning):
df[key]
62 changes: 62 additions & 0 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,3 +1526,65 @@ def test_loc_iloc_setitem_non_categorical_rhs(
# "c" not part of the categories
with pytest.raises(TypeError, match=msg1):
indexer(df)[key] = ["c", "c"]


class TestDepreactedIndexers:
@pytest.mark.parametrize(
"key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})]
)
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
with tm.assert_produces_warning(FutureWarning):
df.loc[key]

@pytest.mark.parametrize(
"key",
[
{1},
{1: 1},
(({1}, 2), "a"),
(({1: 1}, 2), "a"),
((1, 2), {"a"}),
((1, 2), {"a": "a"}),
],
)
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825
df = DataFrame(
[[1, 2], [3, 4]],
columns=["a", "b"],
index=MultiIndex.from_tuples([(1, 2), (3, 4)]),
)
with tm.assert_produces_warning(FutureWarning):
df.loc[key]

@pytest.mark.parametrize(
"key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})]
)
def test_setitem_dict_and_set_deprecated(self, key):
# GH#42825
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
with tm.assert_produces_warning(FutureWarning):
df.loc[key] = 1

@pytest.mark.parametrize(
"key",
[
{1},
{1: 1},
(({1}, 2), "a"),
(({1: 1}, 2), "a"),
((1, 2), {"a"}),
((1, 2), {"a": "a"}),
],
)
def test_setitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825
df = DataFrame(
[[1, 2], [3, 4]],
columns=["a", "b"],
index=MultiIndex.from_tuples([(1, 2), (3, 4)]),
)
with tm.assert_produces_warning(FutureWarning):
df.loc[key] = 1
7 changes: 5 additions & 2 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,11 @@ def convert_nested_indexer(indexer_type, keys):
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys)
)

result = df.loc[indexer, "Data"]
if indexer_type_1 is set or indexer_type_2 is set:
with tm.assert_produces_warning(FutureWarning):
result = df.loc[indexer, "Data"]
else:
result = df.loc[indexer, "Data"]
expected = Series(
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,3 +696,19 @@ def test_duplicated_index_getitem_positional_indexer(index_vals):
s = Series(range(5), index=list(index_vals))
result = s[3]
assert result == 3


class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825
ser = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
ser[key]

@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_deprecated(self, key):
# GH#42825
ser = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
ser[key] = 1
31 changes: 31 additions & 0 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas import (
DataFrame,
IndexSlice,
MultiIndex,
Series,
Timedelta,
Timestamp,
Expand Down Expand Up @@ -316,3 +317,33 @@ def test_frozenset_index():
assert s[idx1] == 2
s[idx1] = 3
assert s[idx1] == 3


class TestDepreactedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825
ser = Series([1, 2])
with tm.assert_produces_warning(FutureWarning):
ser.loc[key]

@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with tm.assert_produces_warning(FutureWarning):
ser.loc[key]

@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_deprecated(self, key):
# GH#42825
ser = Series([1, 2])
with tm.assert_produces_warning(FutureWarning):
ser.loc[key] = 1

@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_setitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with tm.assert_produces_warning(FutureWarning):
ser.loc[key] = 1