Skip to content

Commit a43c157

Browse files
guillemborrelljreback
authored andcommitted
BUG: rolling.quantile does not return an interpolated result (#16247)
1 parent a5477b7 commit a43c157

File tree

5 files changed

+249
-8
lines changed

5 files changed

+249
-8
lines changed

asv_bench/benchmarks/rolling.py

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
from .pandas_vb_common import *
2+
import pandas as pd
3+
import numpy as np
4+
5+
6+
class DataframeRolling(object):
7+
goal_time = 0.2
8+
9+
def setup(self):
10+
self.N = 100000
11+
self.Ns = 10000
12+
self.df = pd.DataFrame({'a': np.random.random(self.N)})
13+
self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
14+
self.wins = 10
15+
self.winl = 1000
16+
17+
def time_rolling_quantile_0(self):
18+
(self.df.rolling(self.wins).quantile(0.0))
19+
20+
def time_rolling_quantile_1(self):
21+
(self.df.rolling(self.wins).quantile(1.0))
22+
23+
def time_rolling_quantile_median(self):
24+
(self.df.rolling(self.wins).quantile(0.5))
25+
26+
def time_rolling_median(self):
27+
(self.df.rolling(self.wins).median())
28+
29+
def time_rolling_median(self):
30+
(self.df.rolling(self.wins).mean())
31+
32+
def time_rolling_max(self):
33+
(self.df.rolling(self.wins).max())
34+
35+
def time_rolling_min(self):
36+
(self.df.rolling(self.wins).min())
37+
38+
def time_rolling_std(self):
39+
(self.df.rolling(self.wins).std())
40+
41+
def time_rolling_count(self):
42+
(self.df.rolling(self.wins).count())
43+
44+
def time_rolling_skew(self):
45+
(self.df.rolling(self.wins).skew())
46+
47+
def time_rolling_kurt(self):
48+
(self.df.rolling(self.wins).kurt())
49+
50+
def time_rolling_sum(self):
51+
(self.df.rolling(self.wins).sum())
52+
53+
def time_rolling_corr(self):
54+
(self.dfs.rolling(self.wins).corr())
55+
56+
def time_rolling_cov(self):
57+
(self.dfs.rolling(self.wins).cov())
58+
59+
def time_rolling_quantile_0_l(self):
60+
(self.df.rolling(self.winl).quantile(0.0))
61+
62+
def time_rolling_quantile_1_l(self):
63+
(self.df.rolling(self.winl).quantile(1.0))
64+
65+
def time_rolling_quantile_median_l(self):
66+
(self.df.rolling(self.winl).quantile(0.5))
67+
68+
def time_rolling_median_l(self):
69+
(self.df.rolling(self.winl).median())
70+
71+
def time_rolling_median_l(self):
72+
(self.df.rolling(self.winl).mean())
73+
74+
def time_rolling_max_l(self):
75+
(self.df.rolling(self.winl).max())
76+
77+
def time_rolling_min_l(self):
78+
(self.df.rolling(self.winl).min())
79+
80+
def time_rolling_std_l(self):
81+
(self.df.rolling(self.wins).std())
82+
83+
def time_rolling_count_l(self):
84+
(self.df.rolling(self.wins).count())
85+
86+
def time_rolling_skew_l(self):
87+
(self.df.rolling(self.wins).skew())
88+
89+
def time_rolling_kurt_l(self):
90+
(self.df.rolling(self.wins).kurt())
91+
92+
def time_rolling_sum_l(self):
93+
(self.df.rolling(self.wins).sum())
94+
95+
96+
class SeriesRolling(object):
97+
goal_time = 0.2
98+
99+
def setup(self):
100+
self.N = 100000
101+
self.Ns = 10000
102+
self.df = pd.DataFrame({'a': np.random.random(self.N)})
103+
self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)})
104+
self.sr = self.df.a
105+
self.srs = self.dfs.a
106+
self.wins = 10
107+
self.winl = 1000
108+
109+
def time_rolling_quantile_0(self):
110+
(self.sr.rolling(self.wins).quantile(0.0))
111+
112+
def time_rolling_quantile_1(self):
113+
(self.sr.rolling(self.wins).quantile(1.0))
114+
115+
def time_rolling_quantile_median(self):
116+
(self.sr.rolling(self.wins).quantile(0.5))
117+
118+
def time_rolling_median(self):
119+
(self.sr.rolling(self.wins).median())
120+
121+
def time_rolling_median(self):
122+
(self.sr.rolling(self.wins).mean())
123+
124+
def time_rolling_max(self):
125+
(self.sr.rolling(self.wins).max())
126+
127+
def time_rolling_min(self):
128+
(self.sr.rolling(self.wins).min())
129+
130+
def time_rolling_std(self):
131+
(self.sr.rolling(self.wins).std())
132+
133+
def time_rolling_count(self):
134+
(self.sr.rolling(self.wins).count())
135+
136+
def time_rolling_skew(self):
137+
(self.sr.rolling(self.wins).skew())
138+
139+
def time_rolling_kurt(self):
140+
(self.sr.rolling(self.wins).kurt())
141+
142+
def time_rolling_sum(self):
143+
(self.sr.rolling(self.wins).sum())
144+
145+
def time_rolling_corr(self):
146+
(self.srs.rolling(self.wins).corr())
147+
148+
def time_rolling_cov(self):
149+
(self.srs.rolling(self.wins).cov())
150+
151+
def time_rolling_quantile_0_l(self):
152+
(self.sr.rolling(self.winl).quantile(0.0))
153+
154+
def time_rolling_quantile_1_l(self):
155+
(self.sr.rolling(self.winl).quantile(1.0))
156+
157+
def time_rolling_quantile_median_l(self):
158+
(self.sr.rolling(self.winl).quantile(0.5))
159+
160+
def time_rolling_median_l(self):
161+
(self.sr.rolling(self.winl).median())
162+
163+
def time_rolling_median_l(self):
164+
(self.sr.rolling(self.winl).mean())
165+
166+
def time_rolling_max_l(self):
167+
(self.sr.rolling(self.winl).max())
168+
169+
def time_rolling_min_l(self):
170+
(self.sr.rolling(self.winl).min())
171+
172+
def time_rolling_std_l(self):
173+
(self.sr.rolling(self.wins).std())
174+
175+
def time_rolling_count_l(self):
176+
(self.sr.rolling(self.wins).count())
177+
178+
def time_rolling_skew_l(self):
179+
(self.sr.rolling(self.wins).skew())
180+
181+
def time_rolling_kurt_l(self):
182+
(self.sr.rolling(self.wins).kurt())
183+
184+
def time_rolling_sum_l(self):
185+
(self.sr.rolling(self.wins).sum())

doc/source/whatsnew/v0.21.0.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,11 @@ Plotting
168168

169169
Groupby/Resample/Rolling
170170
^^^^^^^^^^^^^^^^^^^^^^^^
171-
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
172171

172+
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
173173
- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
174+
- Bug in ``.rolling.quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`)
175+
174176

175177
Sparse
176178
^^^^^^
@@ -191,6 +193,7 @@ Categorical
191193
^^^^^^^^^^^
192194

193195

196+
194197
Other
195198
^^^^^
196199
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)

pandas/_libs/window.pyx

+13-2
Original file line numberDiff line numberDiff line change
@@ -1348,8 +1348,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
13481348
bint is_variable
13491349
ndarray[int64_t] start, end
13501350
ndarray[double_t] output
1351+
double vlow, vhigh
13511352

1352-
if quantile < 0.0 or quantile > 1.0:
1353+
if quantile <= 0.0 or quantile >= 1.0:
13531354
raise ValueError("quantile value {0} not in [0, 1]".format(quantile))
13541355

13551356
# we use the Fixed/Variable Indexer here as the
@@ -1391,7 +1392,17 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
13911392

13921393
if nobs >= minp:
13931394
idx = int(quantile * <double>(nobs - 1))
1394-
output[i] = skiplist.get(idx)
1395+
1396+
# Single value in skip list
1397+
if nobs == 1:
1398+
output[i] = skiplist.get(0)
1399+
1400+
# Interpolated quantile
1401+
else:
1402+
vlow = skiplist.get(idx)
1403+
vhigh = skiplist.get(idx + 1)
1404+
output[i] = (vlow + (vhigh - vlow) *
1405+
(quantile * (nobs - 1) - idx))
13951406
else:
13961407
output[i] = NaN
13971408

pandas/core/window.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -975,8 +975,15 @@ def quantile(self, quantile, **kwargs):
975975

976976
def f(arg, *args, **kwargs):
977977
minp = _use_window(self.min_periods, window)
978-
return _window.roll_quantile(arg, window, minp, indexi,
979-
self.closed, quantile)
978+
if quantile == 1.0:
979+
return _window.roll_max(arg, window, minp, indexi,
980+
self.closed)
981+
elif quantile == 0.0:
982+
return _window.roll_min(arg, window, minp, indexi,
983+
self.closed)
984+
else:
985+
return _window.roll_quantile(arg, window, minp, indexi,
986+
self.closed, quantile)
980987

981988
return self._apply(f, 'quantile', quantile=quantile,
982989
**kwargs)

pandas/tests/test_window.py

+38-3
Original file line numberDiff line numberDiff line change
@@ -1122,8 +1122,19 @@ def test_rolling_quantile(self):
11221122
def scoreatpercentile(a, per):
11231123
values = np.sort(a, axis=0)
11241124

1125-
idx = per / 1. * (values.shape[0] - 1)
1126-
return values[int(idx)]
1125+
idx = int(per / 1. * (values.shape[0] - 1))
1126+
1127+
if idx == values.shape[0] - 1:
1128+
retval = values[-1]
1129+
1130+
else:
1131+
qlow = float(idx) / float(values.shape[0] - 1)
1132+
qhig = float(idx + 1) / float(values.shape[0] - 1)
1133+
vlow = values[idx]
1134+
vhig = values[idx + 1]
1135+
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
1136+
1137+
return retval
11271138

11281139
for q in qs:
11291140

@@ -1138,6 +1149,30 @@ def alt(x):
11381149

11391150
self._check_moment_func(f, alt, name='quantile', quantile=q)
11401151

1152+
def test_rolling_quantile_np_percentile(self):
1153+
# #9413: Tests that rolling window's quantile default behavior
1154+
# is analogus to Numpy's percentile
1155+
row = 10
1156+
col = 5
1157+
idx = pd.date_range(20100101, periods=row, freq='B')
1158+
df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)),
1159+
index=idx)
1160+
1161+
df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0)
1162+
np_percentile = np.percentile(df, [25, 50, 75], axis=0)
1163+
1164+
tm.assert_almost_equal(df_quantile.values, np.array(np_percentile))
1165+
1166+
def test_rolling_quantile_series(self):
1167+
# #16211: Tests that rolling window's quantile default behavior
1168+
# is analogus to pd.Series' quantile
1169+
arr = np.arange(100)
1170+
s = pd.Series(arr)
1171+
q1 = s.quantile(0.1)
1172+
q2 = s.rolling(100).quantile(0.1).iloc[-1]
1173+
1174+
tm.assert_almost_equal(q1, q2)
1175+
11411176
def test_rolling_quantile_param(self):
11421177
ser = Series([0.0, .1, .5, .9, 1.0])
11431178

@@ -3558,7 +3593,7 @@ def test_ragged_quantile(self):
35583593

35593594
result = df.rolling(window='2s', min_periods=1).quantile(0.5)
35603595
expected = df.copy()
3561-
expected['B'] = [0.0, 1, 1.0, 3.0, 3.0]
3596+
expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
35623597
tm.assert_frame_equal(result, expected)
35633598

35643599
def test_ragged_std(self):

0 commit comments

Comments
 (0)