Skip to content

Commit 55a8509

Browse files
committed
Address review comments
1 parent ab2c65d commit 55a8509

File tree

3 files changed

+19
-36
lines changed

3 files changed

+19
-36
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,46 +29,39 @@ Backwards incompatible API changes
2929
GroupBy.apply on ``DataFrame`` evaluates first group only once
3030
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3131

32-
(:issue:`2936`, :issue:`2656`, :issue:`7739`, :issue:`10519`, :issue:`12155`,
33-
:issue:`20084`, :issue:`21417`)
34-
3532
The implementation of ``DataFrame.groupby.apply`` previously evaluated func
3633
consistently twice on the first group to infer if it is safe to use a fast
3734
code path. Particularly for functions with side effects, this was an undesired
3835
behavior and may have led to surprises.
3936

4037
Now every group is evaluated only a single time.
4138

42-
Previous behavior:
43-
44-
.. code-block:: ipython
39+
.. ipython:: python
4540
46-
In [2]: df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
41+
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
42+
df
4743
48-
In [3]: side_effects = []
44+
side_effects = []
45+
def func(group):
46+
side_effects.append(group.name)
47+
return group
48+
df.groupby("a").apply(func)
4949
50-
In [4]: def func_fast_apply(group):
51-
...: side_effects.append(group.name)
52-
...: return len(group)
53-
...:
50+
Previous behavior:
5451

55-
In [5]: df.groupby("a").apply(func_fast_apply)
52+
.. code-block:: ipython
5653
5754
In [6]: assert side_effects == ["x", "x", "y"]
5855
5956
New behavior:
6057

6158
.. ipython:: python
6259
63-
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
60+
assert side_effects == ["x", "y"]
6461
65-
side_effects = []
66-
def func(group):
67-
side_effects.append(group.name)
68-
return group
6962
70-
df.groupby("a").apply(func)
71-
assert side_effects == ["x", "y"]
63+
(:issue:`2936`, :issue:`2656`, :issue:`7739`, :issue:`10519`, :issue:`12155`,
64+
:issue:`20084`, :issue:`21417`)
7265

7366

7467
.. _whatsnew_0250.api.other:

pandas/tests/groupby/test_apply.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,20 +107,22 @@ def f(g):
107107

108108

109109
def test_group_apply_once_per_group():
110-
# GH24748 ,GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
111-
df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
110+
# GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417
111+
df = pd.DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)})
112112

113113
names = []
114114

115115
def f_copy(group):
116116
names.append(group.name)
117117
return group.copy()
118+
118119
df.groupby("a").apply(f_copy)
119120
assert names == [0, 1, 2]
120121

121122
def f_nocopy(group):
122123
names.append(group.name)
123124
return group
125+
124126
names = []
125127
# this takes the slow apply path
126128
df.groupby("a").apply(f_nocopy)

pandas/tests/groupby/test_groupby.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,30 +1420,18 @@ def foo(x):
14201420

14211421
def test_group_name_available_in_inference_pass():
14221422
# gh-15062
1423-
# GH24748 ,GH2936, GH2656, GH7739, GH10519, GH12155, GH20084, GH21417
14241423
df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)})
14251424

14261425
names = []
14271426

1428-
def f_fast(group):
1427+
def f(group):
14291428
names.append(group.name)
14301429
return group.copy()
1430+
df.groupby('a', sort=False, group_keys=False).apply(f)
14311431

1432-
df.groupby('a', sort=False, group_keys=False).apply(f_fast)
1433-
1434-
# every group should appear once, i.e. apply is called once per group
14351432
expected_names = [0, 1, 2]
14361433
assert names == expected_names
14371434

1438-
names_slow = []
1439-
1440-
def f_slow(group):
1441-
names_slow.append(group.name)
1442-
return group
1443-
1444-
df.groupby('a', sort=False, group_keys=False).apply(f_slow)
1445-
assert names_slow == [0, 1, 2]
1446-
14471435

14481436
def test_no_dummy_key_names(df):
14491437
# see gh-1291

0 commit comments

Comments
 (0)