Skip to content

BUG/ENH: Add how kwarg to rolling_* functions [fix #6297] #6845

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 22, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion doc/source/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,11 @@ accept the following arguments:
or :ref:`DateOffset <timeseries.offsets>` to pre-conform the data to.
Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used
instead of ``freq`` that referred to the legacy time rule constants

- ``how``: optionally specify method for down or re-sampling. Default is
is min for ``rolling_min``, max for ``rolling_max``, median for
``rolling_median``, and mean for all other rolling functions. See
:meth:`DataFrame.resample`'s how argument for more information.

These functions can be applied to ndarrays or Series objects:

.. ipython:: python
Expand Down
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ Improvements to existing features
- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an
``object`` dtype array (:issue:`6471`).
- Add option to turn off escaping in ``DataFrame.to_latex`` (:issue:`6472`)
- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:``rolling_max`` defaults to max,
:func:``rolling_min`` defaults to min, and all others default to mean (:issue:`6297`)

.. _release.bug_fixes-0.14.0:

Expand Down
2 changes: 2 additions & 0 deletions doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ Enhancements
- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an
``object`` dtype array (:issue:`6471`).
- Implemented ``Panel.pct_change`` (:issue:`6904`)
- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:``rolling_max`` defaults to max,
:func:``rolling_min`` defaults to min, and all others default to mean (:issue:`6297`)

Performance
~~~~~~~~~~~
Expand Down
85 changes: 51 additions & 34 deletions pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
as a frequency string or DateOffset object.
center : boolean, default False
Set the labels at the center of the window.
how : string, default '%s'
Method for down- or re-sampling
"""

_roll_notes = r"""
Expand Down Expand Up @@ -85,6 +87,8 @@
adjust : boolean, default True
Divide by decaying adjustment factor in beginning periods to account for
imbalance in relative weightings (viewing EWMA as a moving average)
how : string, default 'mean'
Method for down- or re-sampling
"""

_ewm_notes = r"""
Expand Down Expand Up @@ -148,7 +152,7 @@
"""


def rolling_count(arg, window, freq=None, center=False):
def rolling_count(arg, window, freq=None, center=False, how=None):
"""
Rolling count of number of non-NaN observations inside provided window.

Expand All @@ -163,6 +167,8 @@ def rolling_count(arg, window, freq=None, center=False):
as a frequency string or DateOffset object.
center : boolean, default False
Whether the label should correspond with center of window
how : string, default 'mean'
Method for down- or re-sampling

Returns
-------
Expand All @@ -174,7 +180,7 @@ def rolling_count(arg, window, freq=None, center=False):
frequency by resampling the data. This is done with the default parameters
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
"""
arg = _conv_timerule(arg, freq)
arg = _conv_timerule(arg, freq, how)
window = min(window, len(arg))

return_hook, values = _process_data_structure(arg, kill_inf=False)
Expand All @@ -190,19 +196,19 @@ def rolling_count(arg, window, freq=None, center=False):


@Substitution("Unbiased moving covariance.", _binary_arg_flex,
_roll_kw+_pairwise_kw, _flex_retval, _roll_notes)
_roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes)
@Appender(_doc_template)
def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
center=False, pairwise=None):
center=False, pairwise=None, how=None):
if window is None and isinstance(arg2, (int, float)):
window = arg2
arg2 = arg1
pairwise = True if pairwise is None else pairwise # only default unset
elif arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise # only default unset
arg1 = _conv_timerule(arg1, freq)
arg2 = _conv_timerule(arg2, freq)
arg1 = _conv_timerule(arg1, freq, how)
arg2 = _conv_timerule(arg2, freq, how)
window = min(window, len(arg1), len(arg2))

def _get_cov(X, Y):
Expand All @@ -215,19 +221,19 @@ def _get_cov(X, Y):


@Substitution("Moving sample correlation.", _binary_arg_flex,
_roll_kw+_pairwise_kw, _flex_retval, _roll_notes)
_roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes)
@Appender(_doc_template)
def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None,
center=False, pairwise=None):
center=False, pairwise=None, how=None):
if window is None and isinstance(arg2, (int, float)):
window = arg2
arg2 = arg1
pairwise = True if pairwise is None else pairwise # only default unset
elif arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise # only default unset
arg1 = _conv_timerule(arg1, freq)
arg2 = _conv_timerule(arg2, freq)
arg1 = _conv_timerule(arg1, freq, how)
arg2 = _conv_timerule(arg2, freq, how)
window = min(window, len(arg1), len(arg2))

def _get_corr(a, b):
Expand Down Expand Up @@ -289,7 +295,7 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):

@Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n"
"Pairwise moving sample correlation", _pairwise_arg,
_roll_kw, _pairwise_retval, _roll_notes)
_roll_kw%'None', _pairwise_retval, _roll_notes)
@Appender(_doc_template)
def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,
freq=None, center=False):
Expand All @@ -301,7 +307,7 @@ def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,


def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
args=(), kwargs={}, **kwds):
how=None, args=(), kwargs={}, **kwds):
"""
Rolling statistical measure using supplied function. Designed to be
used with passed-in Cython array-based functions.
Expand All @@ -318,6 +324,8 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
Frequency to conform to before computing statistic
center : boolean, default False
Whether the label should correspond with center of window
how : string, default 'mean'
Method for down- or re-sampling
args : tuple
Passed on to func
kwargs : dict
Expand All @@ -327,7 +335,7 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
-------
y : type of input
"""
arg = _conv_timerule(arg, freq)
arg = _conv_timerule(arg, freq, how)
calc = lambda x: func(x, window, minp=minp, args=args, kwargs=kwargs,
**kwds)
return_hook, values = _process_data_structure(arg)
Expand Down Expand Up @@ -413,9 +421,9 @@ def _get_center_of_mass(com, span, halflife):
_type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
adjust=True):
adjust=True, how=None):
com = _get_center_of_mass(com, span, halflife)
arg = _conv_timerule(arg, freq)
arg = _conv_timerule(arg, freq, how)

def _ewma(v):
result = algos.ewma(v, com, int(adjust))
Expand All @@ -437,9 +445,9 @@ def _first_valid_index(arr):
_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
freq=None):
freq=None, how=None):
com = _get_center_of_mass(com, span, halflife)
arg = _conv_timerule(arg, freq)
arg = _conv_timerule(arg, freq, how)
moment2nd = ewma(arg * arg, com=com, min_periods=min_periods)
moment1st = ewma(arg, com=com, min_periods=min_periods)

Expand All @@ -465,16 +473,16 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
bias=False, freq=None, pairwise=None):
bias=False, freq=None, pairwise=None, how=None):
if arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise
elif isinstance(arg2, (int, float)) and com is None:
com = arg2
arg2 = arg1
pairwise = True if pairwise is None else pairwise
arg1 = _conv_timerule(arg1, freq)
arg2 = _conv_timerule(arg2, freq)
arg1 = _conv_timerule(arg1, freq, how)
arg2 = _conv_timerule(arg2, freq, how)

def _get_ewmcov(X, Y):
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
Expand All @@ -492,16 +500,16 @@ def _get_ewmcov(X, Y):
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
@Appender(_doc_template)
def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
freq=None, pairwise=None):
freq=None, pairwise=None, how=None):
if arg2 is None:
arg2 = arg1
pairwise = True if pairwise is None else pairwise
elif isinstance(arg2, (int, float)) and com is None:
com = arg2
arg2 = arg1
pairwise = True if pairwise is None else pairwise
arg1 = _conv_timerule(arg1, freq)
arg2 = _conv_timerule(arg2, freq)
arg1 = _conv_timerule(arg1, freq, how)
arg2 = _conv_timerule(arg2, freq, how)

def _get_ewmcorr(X, Y):
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
Expand Down Expand Up @@ -541,12 +549,12 @@ def _prep_binary(arg1, arg2):
# Python interface to Cython functions


def _conv_timerule(arg, freq):
def _conv_timerule(arg, freq, how):

types = (DataFrame, Series)
if freq is not None and isinstance(arg, types):
# Conform to whatever frequency needed.
arg = arg.resample(freq)
arg = arg.resample(freq, how=how)

return arg

Expand All @@ -567,25 +575,32 @@ def _use_window(minp, window):
return minp


def _rolling_func(func, desc, check_minp=_use_window):
@Substitution(desc, _unary_arg, _roll_kw, _type_of_input_retval, _roll_notes)
def _rolling_func(func, desc, check_minp=_use_window, how=None):
if how is None:
how_arg_str = 'None'
else:
how_arg_str = "'%s"%how

@Substitution(desc, _unary_arg, _roll_kw%how_arg_str, _type_of_input_retval,
_roll_notes)
@Appender(_doc_template)
@wraps(func)
def f(arg, window, min_periods=None, freq=None, center=False,
def f(arg, window, min_periods=None, freq=None, center=False, how=how,
**kwargs):
def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
minp = check_minp(minp, window)
return func(arg, window, minp, **kwds)
return _rolling_moment(arg, window, call_cython, min_periods, freq=freq,
center=center, **kwargs)
center=center, how=how, **kwargs)

return f

rolling_max = _rolling_func(algos.roll_max2, 'Moving maximum.')
rolling_min = _rolling_func(algos.roll_min2, 'Moving minimum.')
rolling_max = _rolling_func(algos.roll_max2, 'Moving maximum.', how='max')
rolling_min = _rolling_func(algos.roll_min2, 'Moving minimum.', how='min')
rolling_sum = _rolling_func(algos.roll_sum, 'Moving sum.')
rolling_mean = _rolling_func(algos.roll_mean, 'Moving mean.')
rolling_median = _rolling_func(algos.roll_median_cython, 'Moving median.')
rolling_median = _rolling_func(algos.roll_median_cython, 'Moving median.',
how='median')

_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw))
rolling_std = _rolling_func(_ts_std, 'Unbiased moving standard deviation.',
Expand Down Expand Up @@ -687,7 +702,7 @@ def call_cython(arg, window, minp, args, kwargs):

def rolling_window(arg, window=None, win_type=None, min_periods=None,
freq=None, center=False, mean=True,
axis=0, **kwargs):
axis=0, how=None, **kwargs):
"""
Applies a moving window of type ``window_type`` and size ``window``
on the data.
Expand All @@ -711,6 +726,8 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
mean : boolean, default True
If True computes weighted mean, else weighted sum
axis : {0, 1}, default 0
how : string, default 'mean'
Method for down- or re-sampling

Returns
-------
Expand Down Expand Up @@ -761,7 +778,7 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,

minp = _use_window(min_periods, len(window))

arg = _conv_timerule(arg, freq)
arg = _conv_timerule(arg, freq, how)
return_hook, values = _process_data_structure(arg)

f = lambda x: algos.roll_window(x, window, minp, avg=mean)
Expand Down
Loading