-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Fix 'observed' kwarg not doing anything on SeriesGroupBy #26463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
a5d6d1a
41f49f4
2575c41
1c02d9f
7350472
0a949d5
0e9f473
1ef54f4
cd481ad
a515caf
ff42dd7
c22875c
cc0b725
629a144
e4fda22
8cfa4a1
db176de
d520952
3591dbc
f97c8a1
d5c9c40
ad16db8
7c525a1
e6bca5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from collections import OrderedDict | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
|
@@ -963,3 +964,81 @@ def test_shift(fill_value): | |
categories=['a', 'b', 'c', 'd'], ordered=False) | ||
res = ct.shift(1, fill_value=fill_value) | ||
assert_equal(res, expected) | ||
|
||
|
||
class TestSeriesGroupByObservedKwarg: | ||
# GH 24880 | ||
|
||
@pytest.fixture(autouse=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't use setup_method, this creates a rather opaque path. I don't think the class adds anything here (or rather it might but we don't use any classes in this file, so rather refactor in a later change). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used a class not to have to do this bit: df = df.copy()[:4] # leave out some groups
df['A'] = df['A'].astype('category')
df['B'] = df['B'].astype('category')
df['C'] = pd.Series([1, 2, 3, 4]) in each of the 3 test functions. An alternative is to put the above in another fixture derived from the first one, but that would probably not be reused much. So do I just rename There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just define the fixture as a function outside of the class (and remove the class as indicated) |
||
def setup_method(self, df): | ||
self.df = df.copy()[:4] # leave out some groups | ||
self.df['A'] = self.df['A'].astype('category') | ||
self.df['B'] = self.df['B'].astype('category') | ||
self.df['C'] = pd.Series([1, 2, 3, 4]) | ||
|
||
@pytest.mark.parametrize('operation, index', [ | ||
('agg', MultiIndex.from_frame( | ||
pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'], | ||
'B': ['one', 'two', 'one', 'three'] | ||
}, dtype='category'))), | ||
('apply', MultiIndex.from_frame( | ||
pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'], | ||
'B': ['one', 'two', 'one', 'three'] | ||
})))]) | ||
def test_true(self, operation, index): | ||
expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C') | ||
grouped = self.df.groupby(['A', 'B'], observed=True)['C'] | ||
result = getattr(grouped, operation)(sum) | ||
assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('operation', ['agg', 'apply']) | ||
@pytest.mark.parametrize('observed', [False, None]) | ||
def test_false_or_none(self, observed, operation): | ||
index, _ = MultiIndex.from_product( | ||
[CategoricalIndex(['bar', 'foo'], | ||
categories=['bar', 'foo'], | ||
ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], | ||
categories=['one', 'three', 'two'], | ||
ordered=False), | ||
], | ||
names=['A', 'B']).sortlevel() | ||
|
||
expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3], | ||
index=index, name='C') | ||
grouped = self.df.groupby(['A', 'B'], observed=observed)['C'] | ||
result = getattr(grouped, operation)(sum) | ||
assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("observed, index, data", [ | ||
(True, MultiIndex.from_tuples( | ||
[('foo', 'one', 'min'), ('foo', 'one', 'max'), | ||
('foo', 'two', 'min'), ('foo', 'two', 'max'), | ||
('bar', 'one', 'min'), ('bar', 'one', 'max'), | ||
('bar', 'three', 'min'), ('bar', 'three', 'max')], | ||
names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]), | ||
(False, MultiIndex.from_product( | ||
[CategoricalIndex(['bar', 'foo'], | ||
categories=['bar', 'foo'], | ||
ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], | ||
categories=['one', 'three', 'two'], | ||
ordered=False), | ||
Index(['min', 'max'])], | ||
names=['A', 'B', None]), | ||
[2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]), | ||
(None, MultiIndex.from_product( | ||
[CategoricalIndex(['bar', 'foo'], | ||
categories=['bar', 'foo'], | ||
ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], | ||
categories=['one', 'three', 'two'], | ||
ordered=False), | ||
Index(['min', 'max'])], | ||
names=['A', 'B', None]), | ||
[2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])]) | ||
def test_apply_dict(self, observed, index, data): | ||
expected = pd.Series(data=data, index=index, name='C') | ||
result = self.df.groupby(['A', 'B'], observed=observed)['C'].apply( | ||
lambda x: OrderedDict([('min', x.min()), ('max', x.max())])) | ||
assert_series_equal(result, expected) |
Uh oh!
There was an error while loading. Please reload this page.