Skip to content

Commit 13858f6

Browse files
jbrockmendelTomAugspurger
authored andcommitted
BUG: validate Index data is 1D + deprecate multi-dim indexing (#30588)
* BUG: validate Index data is 1D
1 parent 2bdb355 commit 13858f6

File tree

16 files changed

+121
-58
lines changed

16 files changed

+121
-58
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,7 @@ Deprecations
706706
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
707707
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
708708
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
709+
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
709710

710711
**Selecting Columns from a Grouped DataFrame**
711712

@@ -1168,6 +1169,7 @@ Other
11681169
- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
11691170
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
11701171
- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`)
1172+
- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`)
11711173

11721174
.. ---------------------------------------------------------------------------
11731175

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,9 +2007,10 @@ def __getitem__(self, key):
20072007
if com.is_bool_indexer(key):
20082008
key = check_bool_array_indexer(self, key)
20092009

2010-
return self._constructor(
2011-
values=self._codes[key], dtype=self.dtype, fastpath=True
2012-
)
2010+
result = self._codes[key]
2011+
if result.ndim > 1:
2012+
return result
2013+
return self._constructor(result, dtype=self.dtype, fastpath=True)
20132014

20142015
def __setitem__(self, key, value):
20152016
"""

pandas/core/arrays/datetimelike.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,6 @@ def __getitem__(self, key):
543543
if result.ndim > 1:
544544
# To support MPL which performs slicing with 2 dim
545545
# even though it only has 1 dim by definition
546-
if is_period:
547-
return self._simple_new(result, dtype=self.dtype, freq=freq)
548546
return result
549547

550548
return self._simple_new(result, dtype=self.dtype, freq=freq)

pandas/core/arrays/interval.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,11 @@ def __getitem__(self, value):
500500

501501
# scalar
502502
if not isinstance(left, ABCIndexClass):
503-
if isna(left):
503+
if is_scalar(left) and isna(left):
504504
return self._fill_value
505+
if np.ndim(left) > 1:
506+
# GH#30588 multi-dimensional indexer disallowed
507+
raise ValueError("multi-dimensional indexing not allowed")
505508
return Interval(left, right, self.closed)
506509

507510
return self._shallow_copy(left, right)

pandas/core/indexes/base.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,9 @@ def __new__(
393393

394394
if kwargs:
395395
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
396+
if subarr.ndim > 1:
397+
# GH#13601, GH#20285, GH#27125
398+
raise ValueError("Index data must be 1-dimensional")
396399
return cls._simple_new(subarr, name, **kwargs)
397400

398401
elif hasattr(data, "__array__"):
@@ -608,7 +611,7 @@ def __array_wrap__(self, result, context=None):
608611
Gets called after a ufunc.
609612
"""
610613
result = lib.item_from_zerodim(result)
611-
if is_bool_dtype(result) or lib.is_scalar(result):
614+
if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
612615
return result
613616

614617
attrs = self._get_attributes_dict()
@@ -687,11 +690,10 @@ def astype(self, dtype, copy=True):
687690
return Index(np.asarray(self), dtype=dtype, copy=copy)
688691

689692
try:
690-
return Index(
691-
self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype
692-
)
693+
casted = self.values.astype(dtype, copy=copy)
693694
except (TypeError, ValueError):
694695
raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}")
696+
return Index(casted, name=self.name, dtype=dtype)
695697

696698
_index_shared_docs[
697699
"take"
@@ -3902,6 +3904,9 @@ def __getitem__(self, key):
39023904
key = com.values_from_object(key)
39033905
result = getitem(key)
39043906
if not is_scalar(result):
3907+
if np.ndim(result) > 1:
3908+
deprecate_ndim_indexing(result)
3909+
return result
39053910
return promote(result)
39063911
else:
39073912
return result
@@ -5533,3 +5538,17 @@ def _try_convert_to_int_array(
55335538
pass
55345539

55355540
raise ValueError
5541+
5542+
5543+
def deprecate_ndim_indexing(result):
5544+
if np.ndim(result) > 1:
5545+
# GH#27125 indexer like idx[:, None] expands dim, but we
5546+
# cannot do that and keep an index, so return ndarray
5547+
# Deprecation GH#30588
5548+
warnings.warn(
5549+
"Support for multi-dimensional indexing (e.g. `index[:, None]`) "
5550+
"on an Index is deprecated and will be removed in a future "
5551+
"version. Convert to a numpy array before indexing instead.",
5552+
DeprecationWarning,
5553+
stacklevel=3,
5554+
)

pandas/core/indexes/extension.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from pandas.core.dtypes.generic import ABCSeries
1313

1414
from pandas.core.arrays import ExtensionArray
15-
from pandas.core.indexes.base import Index
15+
from pandas.core.indexes.base import Index, deprecate_ndim_indexing
1616
from pandas.core.ops import get_op_result_name
1717

1818

@@ -178,6 +178,7 @@ def __getitem__(self, key):
178178
return type(self)(result, name=self.name)
179179

180180
# Includes cases where we get a 2D ndarray back for MPL compat
181+
deprecate_ndim_indexing(result)
181182
return result
182183

183184
def __iter__(self):

pandas/core/indexes/numeric.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
7373
else:
7474
subarr = data
7575

76+
if subarr.ndim > 1:
77+
# GH#13601, GH#20285, GH#27125
78+
raise ValueError("Index data must be 1-dimensional")
79+
7680
name = maybe_extract_name(name, data, cls)
7781
return cls._simple_new(subarr, name=name)
7882

pandas/tests/indexes/categorical/test_category.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,3 +975,9 @@ def test_engine_type(self, dtype, engine_type):
975975
ci.values._codes = ci.values._codes.astype("int64")
976976
assert np.issubdtype(ci.codes.dtype, dtype)
977977
assert isinstance(ci._engine, engine_type)
978+
979+
def test_getitem_2d_deprecated(self):
980+
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
981+
idx = self.create_index()
982+
with pytest.raises(ValueError, match="cannot mask with array containing NA"):
983+
idx[:, None]

pandas/tests/indexes/common.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,3 +875,11 @@ def test_engine_reference_cycle(self):
875875
nrefs_pre = len(gc.get_referrers(index))
876876
index._engine
877877
assert len(gc.get_referrers(index)) == nrefs_pre
878+
879+
def test_getitem_2d_deprecated(self):
880+
# GH#30588
881+
idx = self.create_index()
882+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
883+
res = idx[:, None]
884+
885+
assert isinstance(res, np.ndarray), type(res)

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ def test_dti_business_getitem(self):
8686

8787
def test_dti_business_getitem_matplotlib_hackaround(self):
8888
rng = pd.bdate_range(START, END)
89-
values = rng[:, None]
89+
with tm.assert_produces_warning(DeprecationWarning):
90+
# GH#30588 multi-dimensional indexing deprecated
91+
values = rng[:, None]
9092
expected = rng.values[:, None]
9193
tm.assert_numpy_array_equal(values, expected)
9294

@@ -110,7 +112,9 @@ def test_dti_custom_getitem(self):
110112

111113
def test_dti_custom_getitem_matplotlib_hackaround(self):
112114
rng = pd.bdate_range(START, END, freq="C")
113-
values = rng[:, None]
115+
with tm.assert_produces_warning(DeprecationWarning):
116+
# GH#30588 multi-dimensional indexing deprecated
117+
values = rng[:, None]
114118
expected = rng.values[:, None]
115119
tm.assert_numpy_array_equal(values, expected)
116120

pandas/tests/indexes/interval/test_base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,10 @@ def test_where(self, closed, klass):
7979
expected = IntervalIndex([np.nan] + idx[1:].tolist())
8080
result = idx.where(klass(cond))
8181
tm.assert_index_equal(result, expected)
82+
83+
def test_getitem_2d_deprecated(self):
84+
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
85+
idx = self.create_index()
86+
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
87+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
88+
idx[:, None]

pandas/tests/indexes/test_base.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def test_can_hold_identifiers(self):
7171

7272
@pytest.mark.parametrize("index", ["datetime"], indirect=True)
7373
def test_new_axis(self, index):
74-
new_index = index[None, :]
74+
with tm.assert_produces_warning(DeprecationWarning):
75+
# GH#30588 multi-dimensional indexing deprecated
76+
new_index = index[None, :]
7577
assert new_index.ndim == 2
7678
assert isinstance(new_index, np.ndarray)
7779

@@ -2784,9 +2786,35 @@ def test_shape_of_invalid_index():
27842786
# about this). However, as long as this is not solved in general,this test ensures
27852787
# that the returned shape is consistent with this underlying array for
27862788
# compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775)
2787-
a = np.arange(8).reshape(2, 2, 2)
2788-
idx = pd.Index(a)
2789-
assert idx.shape == a.shape
2790-
27912789
idx = pd.Index([0, 1, 2, 3])
2792-
assert idx[:, None].shape == (4, 1)
2790+
with tm.assert_produces_warning(DeprecationWarning):
2791+
# GH#30588 multi-dimensional indexing deprecated
2792+
assert idx[:, None].shape == (4, 1)
2793+
2794+
2795+
def test_validate_1d_input():
2796+
# GH#27125 check that we do not have >1-dimensional input
2797+
msg = "Index data must be 1-dimensional"
2798+
2799+
arr = np.arange(8).reshape(2, 2, 2)
2800+
with pytest.raises(ValueError, match=msg):
2801+
pd.Index(arr)
2802+
2803+
with pytest.raises(ValueError, match=msg):
2804+
pd.Float64Index(arr.astype(np.float64))
2805+
2806+
with pytest.raises(ValueError, match=msg):
2807+
pd.Int64Index(arr.astype(np.int64))
2808+
2809+
with pytest.raises(ValueError, match=msg):
2810+
pd.UInt64Index(arr.astype(np.uint64))
2811+
2812+
df = pd.DataFrame(arr.reshape(4, 2))
2813+
with pytest.raises(ValueError, match=msg):
2814+
pd.Index(df)
2815+
2816+
# GH#13601 trying to assign a multi-dimensional array to an index is not
2817+
# allowed
2818+
ser = pd.Series(0, range(4))
2819+
with pytest.raises(ValueError, match=msg):
2820+
ser.index = np.array([[2, 3]] * 4)

pandas/tests/indexing/test_indexing.py

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
8383
msg = (
8484
r"Buffer has wrong number of dimensions \(expected 1,"
8585
r" got 3\)|"
86-
"The truth value of an array with more than one element is "
87-
"ambiguous|"
8886
"Cannot index with multidimensional key|"
8987
r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
90-
"No matching signature found|" # TypeError
91-
"unhashable type: 'numpy.ndarray'" # TypeError
88+
"Index data must be 1-dimensional"
9289
)
9390

9491
if (
@@ -104,21 +101,12 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
104101
"categorical",
105102
]
106103
):
107-
idxr[nd3]
108-
else:
109-
if (
110-
isinstance(obj, DataFrame)
111-
and idxr_id == "getitem"
112-
and index.inferred_type == "boolean"
113-
):
114-
error = TypeError
115-
elif idxr_id == "getitem" and index.inferred_type == "interval":
116-
error = TypeError
117-
else:
118-
error = ValueError
119-
120-
with pytest.raises(error, match=msg):
104+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
121105
idxr[nd3]
106+
else:
107+
with pytest.raises(ValueError, match=msg):
108+
with tm.assert_produces_warning(DeprecationWarning):
109+
idxr[nd3]
122110

123111
@pytest.mark.parametrize(
124112
"index", tm.all_index_generator(5), ids=lambda x: type(x).__name__
@@ -146,16 +134,14 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
146134
nd3 = np.random.randint(5, size=(2, 2, 2))
147135

148136
msg = (
149-
r"Buffer has wrong number of dimensions \(expected 1, "
150-
r"got 3\)|"
151-
"The truth value of an array with more than one element is "
152-
"ambiguous|"
153-
"Only 1-dimensional input arrays are supported|"
137+
r"Buffer has wrong number of dimensions \(expected 1,"
138+
r" got 3\)|"
154139
"'pandas._libs.interval.IntervalTree' object has no attribute "
155140
"'set_value'|" # AttributeError
156141
"unhashable type: 'numpy.ndarray'|" # TypeError
157142
"No matching signature found|" # TypeError
158-
r"^\[\[\[" # pandas.core.indexing.IndexingError
143+
r"^\[\[\[|" # pandas.core.indexing.IndexingError
144+
"Index data must be 1-dimensional"
159145
)
160146

161147
if (idxr_id == "iloc") or (
@@ -176,10 +162,8 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
176162
):
177163
idxr[nd3] = 0
178164
else:
179-
with pytest.raises(
180-
(ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError),
181-
match=msg,
182-
):
165+
err = (ValueError, AttributeError)
166+
with pytest.raises(err, match=msg):
183167
idxr[nd3] = 0
184168

185169
def test_inf_upcast(self):

pandas/tests/io/test_feather.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_write_with_index(self):
136136

137137
# column multi-index
138138
df.index = [0, 1, 2]
139-
df.columns = (pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),)
139+
df.columns = pd.MultiIndex.from_tuples([("a", 1)])
140140
self.check_error_on_write(df, ValueError)
141141

142142
def test_path_pathlib(self):

pandas/tests/plotting/test_converter.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,10 @@ def test_registering_no_warning(self):
6666

6767
# Set to the "warn" state, in case this isn't the first test run
6868
register_matplotlib_converters()
69-
with tm.assert_produces_warning(None) as w:
69+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
70+
# GH#30588 DeprecationWarning from 2D indexing
7071
ax.plot(s.index, s.values)
7172

72-
assert len(w) == 0
73-
7473
def test_pandas_plots_register(self):
7574
pytest.importorskip("matplotlib.pyplot")
7675
s = Series(range(12), index=date_range("2017", periods=12))
@@ -101,19 +100,16 @@ def test_option_no_warning(self):
101100

102101
# Test without registering first, no warning
103102
with ctx:
104-
with tm.assert_produces_warning(None) as w:
103+
# GH#30588 DeprecationWarning from 2D indexing on Index
104+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
105105
ax.plot(s.index, s.values)
106106

107-
assert len(w) == 0
108-
109107
# Now test with registering
110108
register_matplotlib_converters()
111109
with ctx:
112-
with tm.assert_produces_warning(None) as w:
110+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
113111
ax.plot(s.index, s.values)
114112

115-
assert len(w) == 0
116-
117113
def test_registry_resets(self):
118114
units = pytest.importorskip("matplotlib.units")
119115
dates = pytest.importorskip("matplotlib.dates")

pandas/tests/series/test_timeseries.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@ def test_first_last_valid(self, datetime_series):
137137
assert ts.last_valid_index().freq == ts.index.freq
138138

139139
def test_mpl_compat_hack(self, datetime_series):
140-
result = datetime_series[:, np.newaxis]
140+
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
141+
# GH#30588 multi-dimensional indexing deprecated
142+
result = datetime_series[:, np.newaxis]
141143
expected = datetime_series.values[:, np.newaxis]
142144
tm.assert_almost_equal(result, expected)
143145

0 commit comments

Comments
 (0)