Skip to content

Commit 6fc9852

Browse files
yehoshuadimarskyjreback
authored andcommitted
DEPR: DataFrame GroupBy indexing with single items DeprecationWarning (#30546)
1 parent 6c1597e commit 6fc9852

File tree

6 files changed

+79
-10
lines changed

6 files changed

+79
-10
lines changed

doc/source/getting_started/comparison/comparison_with_sas.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ for more details and examples.
629629

630630
.. ipython:: python
631631
632-
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
632+
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
633633
tips_summed.head()
634634
635635

doc/source/getting_started/comparison/comparison_with_stata.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ for more details and examples.
617617

618618
.. ipython:: python
619619
620-
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
620+
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
621621
tips_summed.head()
622622
623623

doc/source/whatsnew/v1.0.0.rst

+31
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,37 @@ Deprecations
577577
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
578578
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
579579

580+
**Selecting Columns from a Grouped DataFrame**
581+
582+
When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,
583+
a list of items should be used instead. (:issue:`23566`) For example:
584+
585+
.. code-block:: ipython
586+
587+
df = pd.DataFrame({
588+
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
589+
"B": np.random.randn(8),
590+
"C": np.random.randn(8),
591+
})
592+
g = df.groupby('A')
593+
594+
# single key, returns SeriesGroupBy
595+
g['B']
596+
597+
# tuple of single key, returns SeriesGroupBy
598+
g[('B',)]
599+
600+
# tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning
601+
g[('B', 'C')]
602+
603+
# multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning
604+
# (implicitly converts the passed strings into a single tuple)
605+
g['B', 'C']
606+
607+
# proper way, returns DataFrameGroupBy
608+
g[['B', 'C']]
609+
610+
580611
.. _whatsnew_1000.prior_deprecations:
581612

582613

pandas/core/groupby/generic.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
Union,
2626
cast,
2727
)
28+
import warnings
2829

2930
import numpy as np
3031

@@ -326,7 +327,7 @@ def _aggregate_multiple_funcs(self, arg):
326327
return DataFrame(results, columns=columns)
327328

328329
def _wrap_series_output(
329-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
330+
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
330331
) -> Union[Series, DataFrame]:
331332
"""
332333
Wraps the output of a SeriesGroupBy operation into the expected result.
@@ -1578,6 +1579,19 @@ def filter(self, func, dropna=True, *args, **kwargs):
15781579

15791580
return self._apply_filter(indices, dropna)
15801581

1582+
def __getitem__(self, key):
1583+
# per GH 23566
1584+
if isinstance(key, tuple) and len(key) > 1:
1585+
# if len == 1, then it becomes a SeriesGroupBy and this is actually
1586+
# valid syntax, so don't raise warning
1587+
warnings.warn(
1588+
"Indexing with multiple keys (implicitly converted to a tuple "
1589+
"of keys) will be deprecated, use a list instead.",
1590+
FutureWarning,
1591+
stacklevel=2,
1592+
)
1593+
return super().__getitem__(key)
1594+
15811595
def _gotitem(self, key, ndim: int, subset=None):
15821596
"""
15831597
sub-classes to define

pandas/tests/groupby/test_grouping.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,12 @@ def test_getitem_list_of_columns(self):
7171
)
7272

7373
result = df.groupby("A")[["C", "D"]].mean()
74-
result2 = df.groupby("A")["C", "D"].mean()
75-
result3 = df.groupby("A")[df.columns[2:4]].mean()
74+
result2 = df.groupby("A")[df.columns[2:4]].mean()
7675

7776
expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()
7877

7978
tm.assert_frame_equal(result, expected)
8079
tm.assert_frame_equal(result2, expected)
81-
tm.assert_frame_equal(result3, expected)
8280

8381
def test_getitem_numeric_column_names(self):
8482
# GH #13731
@@ -91,14 +89,40 @@ def test_getitem_numeric_column_names(self):
9189
}
9290
)
9391
result = df.groupby(0)[df.columns[1:3]].mean()
94-
result2 = df.groupby(0)[2, 4].mean()
95-
result3 = df.groupby(0)[[2, 4]].mean()
92+
result2 = df.groupby(0)[[2, 4]].mean()
9693

9794
expected = df.loc[:, [0, 2, 4]].groupby(0).mean()
9895

9996
tm.assert_frame_equal(result, expected)
10097
tm.assert_frame_equal(result2, expected)
101-
tm.assert_frame_equal(result3, expected)
98+
99+
# per GH 23566 this should raise a FutureWarning
100+
with tm.assert_produces_warning(FutureWarning):
101+
df.groupby(0)[2, 4].mean()
102+
103+
def test_getitem_single_list_of_columns(self, df):
104+
# per GH 23566 this should raise a FutureWarning
105+
with tm.assert_produces_warning(FutureWarning):
106+
df.groupby("A")["C", "D"].mean()
107+
108+
def test_getitem_single_column(self):
109+
df = DataFrame(
110+
{
111+
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
112+
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
113+
"C": np.random.randn(8),
114+
"D": np.random.randn(8),
115+
"E": np.random.randn(8),
116+
}
117+
)
118+
119+
result = df.groupby("A")["C"].mean()
120+
121+
as_frame = df.loc[:, ["A", "C"]].groupby("A").mean()
122+
as_series = as_frame.iloc[:, 0]
123+
expected = as_series
124+
125+
tm.assert_series_equal(result, expected)
102126

103127

104128
# grouping

pandas/tests/groupby/test_transform.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe):
319319

320320
def test_transform_select_columns(df):
321321
f = lambda x: x.mean()
322-
result = df.groupby("A")["C", "D"].transform(f)
322+
result = df.groupby("A")[["C", "D"]].transform(f)
323323

324324
selection = df[["C", "D"]]
325325
expected = selection.groupby(df["A"]).transform(f)

0 commit comments

Comments
 (0)