Skip to content

Commit fbd2343

Browse files
committed
Add how kwarg to rolling_* functions [fix #6297]
1 parent 95090fd commit fbd2343

File tree

5 files changed

+151
-35
lines changed

5 files changed

+151
-35
lines changed

doc/source/computation.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,11 @@ accept the following arguments:
240240
or :ref:`DateOffset <timeseries.offsets>` to pre-conform the data to.
241241
Note that prior to pandas v0.8.0, a keyword argument ``time_rule`` was used
242242
instead of ``freq`` that referred to the legacy time rule constants
243-
243+
- ``how``: optionally specify method for down or re-sampling. Default is
244+
is min for ``rolling_min``, max for ``rolling_max``, median for
245+
``rolling_median``, and mean for all other rolling functions. See
246+
:meth:`DataFrame.resample`'s how argument for more information.
247+
244248
These functions can be applied to ndarrays or Series objects:
245249

246250
.. ipython:: python

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ Improvements to existing features
265265
- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an
266266
``object`` dtype array (:issue:`6471`).
267267
- Add option to turn off escaping in ``DataFrame.to_latex`` (:issue:`6472`)
268+
- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:``rolling_max`` defaults to max,
269+
:func:``rolling_min`` defaults to min, and all others default to mean (:issue:`6297`)
268270

269271
.. _release.bug_fixes-0.14.0:
270272

doc/source/v0.14.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,8 @@ Enhancements
471471
- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an
472472
``object`` dtype array (:issue:`6471`).
473473
- Implemented ``Panel.pct_change`` (:issue:`6904`)
474+
- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:``rolling_max`` defaults to max,
475+
:func:``rolling_min`` defaults to min, and all others default to mean (:issue:`6297`)
474476

475477
Performance
476478
~~~~~~~~~~~

pandas/stats/moments.py

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
as a frequency string or DateOffset object.
5858
center : boolean, default False
5959
Set the labels at the center of the window.
60+
how : string, default '%s'
61+
Method for down- or re-sampling
6062
"""
6163

6264
_roll_notes = r"""
@@ -85,6 +87,8 @@
8587
adjust : boolean, default True
8688
Divide by decaying adjustment factor in beginning periods to account for
8789
imbalance in relative weightings (viewing EWMA as a moving average)
90+
how : string, default 'mean'
91+
Method for down- or re-sampling
8892
"""
8993

9094
_ewm_notes = r"""
@@ -148,7 +152,7 @@
148152
"""
149153

150154

151-
def rolling_count(arg, window, freq=None, center=False):
155+
def rolling_count(arg, window, freq=None, center=False, how=None):
152156
"""
153157
Rolling count of number of non-NaN observations inside provided window.
154158
@@ -163,6 +167,8 @@ def rolling_count(arg, window, freq=None, center=False):
163167
as a frequency string or DateOffset object.
164168
center : boolean, default False
165169
Whether the label should correspond with center of window
170+
how : string, default 'mean'
171+
Method for down- or re-sampling
166172
167173
Returns
168174
-------
@@ -174,7 +180,7 @@ def rolling_count(arg, window, freq=None, center=False):
174180
frequency by resampling the data. This is done with the default parameters
175181
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
176182
"""
177-
arg = _conv_timerule(arg, freq)
183+
arg = _conv_timerule(arg, freq, how)
178184
window = min(window, len(arg))
179185

180186
return_hook, values = _process_data_structure(arg, kill_inf=False)
@@ -190,19 +196,19 @@ def rolling_count(arg, window, freq=None, center=False):
190196

191197

192198
@Substitution("Unbiased moving covariance.", _binary_arg_flex,
193-
_roll_kw+_pairwise_kw, _flex_retval, _roll_notes)
199+
_roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes)
194200
@Appender(_doc_template)
195201
def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
196-
center=False, pairwise=None):
202+
center=False, pairwise=None, how=None):
197203
if window is None and isinstance(arg2, (int, float)):
198204
window = arg2
199205
arg2 = arg1
200206
pairwise = True if pairwise is None else pairwise # only default unset
201207
elif arg2 is None:
202208
arg2 = arg1
203209
pairwise = True if pairwise is None else pairwise # only default unset
204-
arg1 = _conv_timerule(arg1, freq)
205-
arg2 = _conv_timerule(arg2, freq)
210+
arg1 = _conv_timerule(arg1, freq, how)
211+
arg2 = _conv_timerule(arg2, freq, how)
206212
window = min(window, len(arg1), len(arg2))
207213

208214
def _get_cov(X, Y):
@@ -215,19 +221,19 @@ def _get_cov(X, Y):
215221

216222

217223
@Substitution("Moving sample correlation.", _binary_arg_flex,
218-
_roll_kw+_pairwise_kw, _flex_retval, _roll_notes)
224+
_roll_kw%'None'+_pairwise_kw, _flex_retval, _roll_notes)
219225
@Appender(_doc_template)
220226
def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None,
221-
center=False, pairwise=None):
227+
center=False, pairwise=None, how=None):
222228
if window is None and isinstance(arg2, (int, float)):
223229
window = arg2
224230
arg2 = arg1
225231
pairwise = True if pairwise is None else pairwise # only default unset
226232
elif arg2 is None:
227233
arg2 = arg1
228234
pairwise = True if pairwise is None else pairwise # only default unset
229-
arg1 = _conv_timerule(arg1, freq)
230-
arg2 = _conv_timerule(arg2, freq)
235+
arg1 = _conv_timerule(arg1, freq, how)
236+
arg2 = _conv_timerule(arg2, freq, how)
231237
window = min(window, len(arg1), len(arg2))
232238

233239
def _get_corr(a, b):
@@ -289,7 +295,7 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):
289295

290296
@Substitution("Deprecated. Use rolling_corr(..., pairwise=True) instead.\n\n"
291297
"Pairwise moving sample correlation", _pairwise_arg,
292-
_roll_kw, _pairwise_retval, _roll_notes)
298+
_roll_kw%'None', _pairwise_retval, _roll_notes)
293299
@Appender(_doc_template)
294300
def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,
295301
freq=None, center=False):
@@ -301,7 +307,7 @@ def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,
301307

302308

303309
def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
304-
args=(), kwargs={}, **kwds):
310+
how=None, args=(), kwargs={}, **kwds):
305311
"""
306312
Rolling statistical measure using supplied function. Designed to be
307313
used with passed-in Cython array-based functions.
@@ -318,6 +324,8 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
318324
Frequency to conform to before computing statistic
319325
center : boolean, default False
320326
Whether the label should correspond with center of window
327+
how : string, default 'mean'
328+
Method for down- or re-sampling
321329
args : tuple
322330
Passed on to func
323331
kwargs : dict
@@ -327,7 +335,7 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
327335
-------
328336
y : type of input
329337
"""
330-
arg = _conv_timerule(arg, freq)
338+
arg = _conv_timerule(arg, freq, how)
331339
calc = lambda x: func(x, window, minp=minp, args=args, kwargs=kwargs,
332340
**kwds)
333341
return_hook, values = _process_data_structure(arg)
@@ -413,9 +421,9 @@ def _get_center_of_mass(com, span, halflife):
413421
_type_of_input_retval, _ewm_notes)
414422
@Appender(_doc_template)
415423
def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None,
416-
adjust=True):
424+
adjust=True, how=None):
417425
com = _get_center_of_mass(com, span, halflife)
418-
arg = _conv_timerule(arg, freq)
426+
arg = _conv_timerule(arg, freq, how)
419427

420428
def _ewma(v):
421429
result = algos.ewma(v, com, int(adjust))
@@ -437,9 +445,9 @@ def _first_valid_index(arr):
437445
_ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes)
438446
@Appender(_doc_template)
439447
def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False,
440-
freq=None):
448+
freq=None, how=None):
441449
com = _get_center_of_mass(com, span, halflife)
442-
arg = _conv_timerule(arg, freq)
450+
arg = _conv_timerule(arg, freq, how)
443451
moment2nd = ewma(arg * arg, com=com, min_periods=min_periods)
444452
moment1st = ewma(arg, com=com, min_periods=min_periods)
445453

@@ -465,16 +473,16 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False):
465473
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
466474
@Appender(_doc_template)
467475
def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
468-
bias=False, freq=None, pairwise=None):
476+
bias=False, freq=None, pairwise=None, how=None):
469477
if arg2 is None:
470478
arg2 = arg1
471479
pairwise = True if pairwise is None else pairwise
472480
elif isinstance(arg2, (int, float)) and com is None:
473481
com = arg2
474482
arg2 = arg1
475483
pairwise = True if pairwise is None else pairwise
476-
arg1 = _conv_timerule(arg1, freq)
477-
arg2 = _conv_timerule(arg2, freq)
484+
arg1 = _conv_timerule(arg1, freq, how)
485+
arg2 = _conv_timerule(arg2, freq, how)
478486

479487
def _get_ewmcov(X, Y):
480488
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
@@ -492,16 +500,16 @@ def _get_ewmcov(X, Y):
492500
_ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes)
493501
@Appender(_doc_template)
494502
def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0,
495-
freq=None, pairwise=None):
503+
freq=None, pairwise=None, how=None):
496504
if arg2 is None:
497505
arg2 = arg1
498506
pairwise = True if pairwise is None else pairwise
499507
elif isinstance(arg2, (int, float)) and com is None:
500508
com = arg2
501509
arg2 = arg1
502510
pairwise = True if pairwise is None else pairwise
503-
arg1 = _conv_timerule(arg1, freq)
504-
arg2 = _conv_timerule(arg2, freq)
511+
arg1 = _conv_timerule(arg1, freq, how)
512+
arg2 = _conv_timerule(arg2, freq, how)
505513

506514
def _get_ewmcorr(X, Y):
507515
mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods)
@@ -541,12 +549,12 @@ def _prep_binary(arg1, arg2):
541549
# Python interface to Cython functions
542550

543551

544-
def _conv_timerule(arg, freq):
552+
def _conv_timerule(arg, freq, how):
545553

546554
types = (DataFrame, Series)
547555
if freq is not None and isinstance(arg, types):
548556
# Conform to whatever frequency needed.
549-
arg = arg.resample(freq)
557+
arg = arg.resample(freq, how=how)
550558

551559
return arg
552560

@@ -567,25 +575,32 @@ def _use_window(minp, window):
567575
return minp
568576

569577

570-
def _rolling_func(func, desc, check_minp=_use_window):
571-
@Substitution(desc, _unary_arg, _roll_kw, _type_of_input_retval, _roll_notes)
578+
def _rolling_func(func, desc, check_minp=_use_window, how=None):
579+
if how is None:
580+
how_arg_str = 'None'
581+
else:
582+
how_arg_str = "'%s"%how
583+
584+
@Substitution(desc, _unary_arg, _roll_kw%how_arg_str, _type_of_input_retval,
585+
_roll_notes)
572586
@Appender(_doc_template)
573587
@wraps(func)
574-
def f(arg, window, min_periods=None, freq=None, center=False,
588+
def f(arg, window, min_periods=None, freq=None, center=False, how=how,
575589
**kwargs):
576590
def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
577591
minp = check_minp(minp, window)
578592
return func(arg, window, minp, **kwds)
579593
return _rolling_moment(arg, window, call_cython, min_periods, freq=freq,
580-
center=center, **kwargs)
594+
center=center, how=how, **kwargs)
581595

582596
return f
583597

584-
rolling_max = _rolling_func(algos.roll_max2, 'Moving maximum.')
585-
rolling_min = _rolling_func(algos.roll_min2, 'Moving minimum.')
598+
rolling_max = _rolling_func(algos.roll_max2, 'Moving maximum.', how='max')
599+
rolling_min = _rolling_func(algos.roll_min2, 'Moving minimum.', how='min')
586600
rolling_sum = _rolling_func(algos.roll_sum, 'Moving sum.')
587601
rolling_mean = _rolling_func(algos.roll_mean, 'Moving mean.')
588-
rolling_median = _rolling_func(algos.roll_median_cython, 'Moving median.')
602+
rolling_median = _rolling_func(algos.roll_median_cython, 'Moving median.',
603+
how='median')
589604

590605
_ts_std = lambda *a, **kw: _zsqrt(algos.roll_var(*a, **kw))
591606
rolling_std = _rolling_func(_ts_std, 'Unbiased moving standard deviation.',
@@ -687,7 +702,7 @@ def call_cython(arg, window, minp, args, kwargs):
687702

688703
def rolling_window(arg, window=None, win_type=None, min_periods=None,
689704
freq=None, center=False, mean=True,
690-
axis=0, **kwargs):
705+
axis=0, how=None, **kwargs):
691706
"""
692707
Applies a moving window of type ``window_type`` and size ``window``
693708
on the data.
@@ -711,6 +726,8 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
711726
mean : boolean, default True
712727
If True computes weighted mean, else weighted sum
713728
axis : {0, 1}, default 0
729+
how : string, default 'mean'
730+
Method for down- or re-sampling
714731
715732
Returns
716733
-------
@@ -761,7 +778,7 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
761778

762779
minp = _use_window(min_periods, len(window))
763780

764-
arg = _conv_timerule(arg, freq)
781+
arg = _conv_timerule(arg, freq, how)
765782
return_hook, values = _process_data_structure(arg)
766783

767784
f = lambda x: algos.roll_window(x, window, minp, avg=mean)

0 commit comments

Comments
 (0)