-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Fix 'observed' kwarg not doing anything on SeriesGroupBy #26463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
a5d6d1a
41f49f4
2575c41
1c02d9f
7350472
0a949d5
0e9f473
1ef54f4
cd481ad
a515caf
ff42dd7
c22875c
cc0b725
629a144
e4fda22
8cfa4a1
db176de
d520952
3591dbc
f97c8a1
d5c9c40
ad16db8
7c525a1
e6bca5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from collections import OrderedDict | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
|
@@ -963,3 +964,72 @@ def test_shift(fill_value): | |
categories=['a', 'b', 'c', 'd'], ordered=False) | ||
res = ct.shift(1, fill_value=fill_value) | ||
assert_equal(res, expected) | ||
|
||
|
||
@pytest.fixture | ||
def df_cat(df): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doctring, xref #19159 |
||
df_cat = df.copy()[:4] # leave out some groups | ||
df_cat['A'] = df_cat['A'].astype('category') | ||
df_cat['B'] = df_cat['B'].astype('category') | ||
df_cat['C'] = pd.Series([1, 2, 3, 4]) | ||
yield df_cat | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yield is perfectly valid, but for consistency with the rest of the fixtures can you use a return. There's no context, teardown or finalization here. |
||
|
||
|
||
@pytest.mark.parametrize('operation, index', [ | ||
('agg', MultiIndex.from_frame( | ||
pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'], | ||
'B': ['one', 'two', 'one', 'three'] | ||
}, dtype='category'))), | ||
('apply', MultiIndex.from_frame( | ||
pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the index is the same as above with just the dtype being different, might be clearer if you parametrize over just dtype, something like... @pytest.mark.paramtrize(...., 'kwargs', [(..., None), (..., dict(dtype='category'))] and then MultiIndex.from_frame(...., **kwargs) |
||
'B': ['one', 'two', 'one', 'three'] | ||
})))]) | ||
def test_seriesgroupby_observed_true(df_cat, operation, index): | ||
# GH 24880 | ||
expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Series and DataFrame are already imported, so you could remove the pd prefixes, here and elsewhere in the tests. |
||
grouped = df_cat.groupby(['A', 'B'], observed=True)['C'] | ||
result = getattr(grouped, operation)(sum) | ||
assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize('operation', ['agg', 'apply']) | ||
@pytest.mark.parametrize('observed', [False, None]) | ||
def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): | ||
# GH 24880 | ||
index, _ = MultiIndex.from_product( | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
[CategoricalIndex(['bar', 'foo'], ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], ordered=False)], | ||
names=['A', 'B']).sortlevel() | ||
|
||
expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3], | ||
index=index, name='C') | ||
grouped = df_cat.groupby(['A', 'B'], observed=observed)['C'] | ||
result = getattr(grouped, operation)(sum) | ||
assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize("observed, index, data", [ | ||
(True, MultiIndex.from_tuples( | ||
[('foo', 'one', 'min'), ('foo', 'one', 'max'), | ||
('foo', 'two', 'min'), ('foo', 'two', 'max'), | ||
('bar', 'one', 'min'), ('bar', 'one', 'max'), | ||
('bar', 'three', 'min'), ('bar', 'three', 'max')], | ||
names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]), | ||
(False, MultiIndex.from_product( | ||
[CategoricalIndex(['bar', 'foo'], ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], ordered=False), | ||
Index(['min', 'max'])], | ||
names=['A', 'B', None]), | ||
[2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]), | ||
(None, MultiIndex.from_product( | ||
[CategoricalIndex(['bar', 'foo'], ordered=False), | ||
CategoricalIndex(['one', 'three', 'two'], ordered=False), | ||
Index(['min', 'max'])], | ||
names=['A', 'B', None]), | ||
[2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])]) | ||
def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): | ||
# GH 24880 | ||
expected = pd.Series(data=data, index=index, name='C') | ||
result = df_cat.groupby(['A', 'B'], observed=observed)['C'].apply( | ||
lambda x: OrderedDict([('min', x.min()), ('max', x.max())])) | ||
assert_series_equal(result, expected) |
Uh oh!
There was an error while loading. Please reload this page.