-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Exclude nuisance columns from result of window functions #27044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
e06d307
0d3f912
e812bf9
12a012b
035a697
392e7e6
76adb2c
eaeca8e
fd678a5
ad9b5e2
6d7602e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries, | ||
ABCTimedeltaIndex) | ||
|
||
from pandas.core.base import PandasObject, SelectionMixin | ||
from pandas.core.base import DataError, PandasObject, SelectionMixin | ||
import pandas.core.common as com | ||
from pandas.core.generic import _shared_docs | ||
from pandas.core.groupby.base import GroupByMixin | ||
|
@@ -112,9 +112,9 @@ def _create_blocks(self): | |
if obj.ndim == 2: | ||
obj = obj.reindex(columns=obj.columns.difference([self.on]), | ||
copy=False) | ||
blocks = obj._to_dict_of_blocks(copy=False).values() | ||
blocks_dict = obj._to_dict_of_blocks(copy=False) | ||
|
||
return blocks, obj, index | ||
return blocks_dict, obj, index | ||
|
||
def _gotitem(self, key, ndim, subset=None): | ||
""" | ||
|
@@ -243,7 +243,7 @@ def _wrap_result(self, result, block=None, obj=None): | |
return type(obj)(result, index=index, columns=block.columns) | ||
return result | ||
|
||
def _wrap_results(self, results, blocks, obj): | ||
def _wrap_results(self, results, blocks, obj, exclude=None): | ||
""" | ||
Wrap the results. | ||
|
||
|
@@ -252,6 +252,7 @@ def _wrap_results(self, results, blocks, obj): | |
results : list of ndarrays | ||
blocks : list of blocks | ||
obj : conformed data (may be resampled) | ||
exclude: list of columns to exclude, default to None | ||
""" | ||
|
||
from pandas import Series, concat | ||
|
@@ -285,6 +286,13 @@ def _wrap_results(self, results, blocks, obj): | |
indexer = columns.get_indexer(selection.tolist() + [name]) | ||
columns = columns.take(sorted(indexer)) | ||
|
||
# exclude nuisance columns so that they are not reindexed | ||
if exclude is not None and exclude: | ||
columns = [c for c in columns if c not in exclude] | ||
|
||
if not columns: | ||
raise DataError('No numeric types to aggregate') | ||
|
||
if not len(final): | ||
return obj.astype('float64') | ||
return concat(final, axis=1).reindex(columns=columns, copy=False) | ||
|
@@ -671,14 +679,24 @@ def _apply_window(self, mean=True, **kwargs): | |
window = self._prep_window(**kwargs) | ||
center = self.center | ||
|
||
blocks, obj, index = self._create_blocks() | ||
blocks_dict, obj, index = self._create_blocks() | ||
dtypes = blocks_dict.keys() | ||
blocks = blocks_dict.values() | ||
|
||
results = [] | ||
for b in blocks: | ||
exclude = [] | ||
for dtype in list(dtypes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. best just to
then don't need anything else There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since my solution to unordered dict issue requires deleting nuisance blocks (block of columns with same type) I needed a shallow copy of keys to remove them iteratively |
||
b = blocks_dict[dtype] | ||
try: | ||
values = self._prep_values(b.values) | ||
except TypeError: | ||
results.append(b.values.copy()) | ||
continue | ||
|
||
except (TypeError, NotImplementedError): | ||
if isinstance(obj, ABCDataFrame): | ||
exclude.extend(b.columns) | ||
del blocks_dict[dtype] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you del here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you stated the order for dictionary differs in each run. So iteration order for So in case of a DataFrame with columns
|
||
continue | ||
else: | ||
raise DataError('No numeric types to aggregate') | ||
|
||
if values.size == 0: | ||
results.append(values.copy()) | ||
|
@@ -700,7 +718,7 @@ def f(arg, *args, **kwargs): | |
result = self._center_window(result, window) | ||
results.append(result) | ||
|
||
return self._wrap_results(results, blocks, obj) | ||
return self._wrap_results(results, blocks, obj, exclude) | ||
|
||
_agg_see_also_doc = dedent(""" | ||
See Also | ||
|
@@ -842,11 +860,25 @@ def _apply(self, func, name=None, window=None, center=None, | |
if check_minp is None: | ||
check_minp = _use_window | ||
|
||
blocks, obj, index = self._create_blocks() | ||
blocks_dict, obj, index = self._create_blocks() | ||
dtypes = blocks_dict.keys() | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
blocks = blocks_dict.values() | ||
index, indexi = self._get_index(index=index) | ||
|
||
results = [] | ||
for b in blocks: | ||
values = self._prep_values(b.values) | ||
exclude = [] | ||
for dtype in list(dtypes): | ||
b = blocks_dict[dtype] | ||
try: | ||
values = self._prep_values(b.values) | ||
|
||
except (TypeError, NotImplementedError): | ||
if isinstance(obj, ABCDataFrame): | ||
exclude.extend(b.columns) | ||
del blocks_dict[dtype] | ||
continue | ||
else: | ||
raise DataError('No numeric types to aggregate') | ||
|
||
if values.size == 0: | ||
results.append(values.copy()) | ||
|
@@ -892,7 +924,7 @@ def calc(x): | |
|
||
results.append(result) | ||
|
||
return self._wrap_results(results, blocks, obj) | ||
return self._wrap_results(results, blocks, obj, exclude) | ||
|
||
|
||
class _Rolling_and_Expanding(_Rolling): | ||
|
@@ -937,7 +969,8 @@ class _Rolling_and_Expanding(_Rolling): | |
|
||
def count(self): | ||
|
||
blocks, obj, index = self._create_blocks() | ||
blocks_dict, obj, index = self._create_blocks() | ||
blocks = blocks_dict.values() | ||
# Validate the index | ||
self._get_index(index=index) | ||
|
||
|
@@ -2290,14 +2323,23 @@ def _apply(self, func, **kwargs): | |
------- | ||
y : same type as input argument | ||
""" | ||
blocks, obj, index = self._create_blocks() | ||
blocks_dict, obj, index = self._create_blocks() | ||
dtypes = blocks_dict.keys() | ||
blocks = blocks_dict.values() | ||
|
||
results = [] | ||
for b in blocks: | ||
exclude = [] | ||
for dtype in list(dtypes): | ||
b = blocks_dict[dtype] | ||
try: | ||
values = self._prep_values(b.values) | ||
except TypeError: | ||
results.append(b.values.copy()) | ||
continue | ||
except (TypeError, NotImplementedError): | ||
if isinstance(obj, ABCDataFrame): | ||
exclude.extend(b.columns) | ||
del blocks_dict[dtype] | ||
continue | ||
else: | ||
raise DataError('No numeric types to aggregate') | ||
|
||
if values.size == 0: | ||
results.append(values.copy()) | ||
|
@@ -2316,7 +2358,7 @@ def func(arg): | |
|
||
results.append(np.apply_along_axis(func, self.axis, values)) | ||
|
||
return self._wrap_results(results, blocks, obj) | ||
return self._wrap_results(results, blocks, obj, exclude) | ||
|
||
@Substitution(name='ewm') | ||
@Appender(_doc_template) | ||
|
Uh oh!
There was an error while loading. Please reload this page.