Skip to content

Commit 5f845db

Browse files
committed
Merge pull request #7631 from jreback/groupby_nth_fix
BUG: doc example in groupby.rst (GH7559 / GH7628)
2 parents c8a3eba + b4dea19 commit 5f845db

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

pandas/core/groupby.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -829,19 +829,29 @@ def nth(self, n, dropna=None):
829829
dropped = self.obj.dropna(how=dropna, axis=self.axis)
830830

831831
# get a new grouper for our dropped obj
832-
grouper, exclusions, obj = _get_grouper(dropped, key=self.keys, axis=self.axis,
833-
level=self.level, sort=self.sort)
832+
if self.keys is None and self.level is None:
834833

835-
sizes = obj.groupby(grouper).size()
836-
result = obj.groupby(grouper).nth(n)
834+
# we don't have the grouper info available (e.g. we have selected out
835+
# a column that is not in the current object)
836+
axis = self.grouper.axis
837+
grouper = axis[axis.isin(dropped.index)]
838+
keys = self.grouper.names
839+
else:
840+
841+
# create a grouper with the original parameters, but on the dropped object
842+
grouper, _, _ = _get_grouper(dropped, key=self.keys, axis=self.axis,
843+
level=self.level, sort=self.sort)
844+
845+
sizes = dropped.groupby(grouper).size()
846+
result = dropped.groupby(grouper).nth(n)
837847
mask = (sizes<max_len).values
838848

839849
# set the results which don't meet the criteria
840850
if len(result) and mask.any():
841851
result.loc[mask] = np.nan
842852

843853
# reset/reindex to the original groups
844-
if len(self.obj) == len(dropped):
854+
if len(self.obj) == len(dropped) or len(result) == len(self.grouper.result_index):
845855
result.index = self.grouper.result_index
846856
else:
847857
result = result.reindex(self.grouper.result_index)

pandas/tests/test_groupby.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,13 @@ def test_nth(self):
304304
result = s.groupby(g).nth(0,dropna='all')
305305
assert_series_equal(result,expected)
306306

307+
# doc example
308+
df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
309+
g = df.groupby('A')
310+
result = g.B.nth(0, dropna=True)
311+
expected = g.B.first()
312+
assert_series_equal(result,expected)
313+
307314
def test_grouper_index_types(self):
308315
# related GH5375
309316
# groupby misbehaving when using a Floatlike index

0 commit comments

Comments
 (0)