Skip to content

Commit 823bf6c

Browse files
fujiaxiangjreback
authored andcommitted
BUG: Series rolling count ignores min_periods (#30923)
1 parent d8b30c9 commit 823bf6c

File tree

7 files changed

+89
-32
lines changed

7 files changed

+89
-32
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,7 @@ Groupby/resample/rolling
11651165
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
11661166
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
11671167
- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
1168+
- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`)
11681169

11691170
Reshaping
11701171
^^^^^^^^^

pandas/core/window/rolling.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,17 +1182,13 @@ class _Rolling_and_Expanding(_Rolling):
11821182
def count(self):
11831183

11841184
blocks, obj = self._create_blocks()
1185-
1186-
window = self._get_window()
1187-
window = min(window, len(obj)) if not self.center else window
1188-
11891185
results = []
11901186
for b in blocks:
11911187
result = b.notna().astype(int)
11921188
result = self._constructor(
11931189
result,
1194-
window=window,
1195-
min_periods=0,
1190+
window=self._get_window(),
1191+
min_periods=self.min_periods or 0,
11961192
center=self.center,
11971193
axis=self.axis,
11981194
closed=self.closed,
@@ -1657,7 +1653,11 @@ def _get_cov(X, Y):
16571653
mean = lambda x: x.rolling(
16581654
window, self.min_periods, center=self.center
16591655
).mean(**kwargs)
1660-
count = (X + Y).rolling(window=window, center=self.center).count(**kwargs)
1656+
count = (
1657+
(X + Y)
1658+
.rolling(window=window, min_periods=0, center=self.center)
1659+
.count(**kwargs)
1660+
)
16611661
bias_adj = count / (count - ddof)
16621662
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
16631663

pandas/tests/window/moments/test_moments_expanding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def test_expanding_corr(self):
4040
tm.assert_almost_equal(rolling_result, result)
4141

4242
def test_expanding_count(self):
43-
result = self.series.expanding().count()
43+
result = self.series.expanding(min_periods=0).count()
4444
tm.assert_almost_equal(
45-
result, self.series.rolling(window=len(self.series)).count()
45+
result, self.series.rolling(window=len(self.series), min_periods=0).count()
4646
)
4747

4848
def test_expanding_quantile(self):
@@ -369,7 +369,7 @@ def test_expanding_consistency(self, min_periods):
369369
)
370370
self._test_moments_consistency(
371371
min_periods=min_periods,
372-
count=lambda x: x.expanding().count(),
372+
count=lambda x: x.expanding(min_periods=min_periods).count(),
373373
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
374374
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
375375
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),

pandas/tests/window/moments/test_moments_rolling.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False):
777777
series_result = get_result(series, window=win, min_periods=minp)
778778
frame_result = get_result(frame, window=win, min_periods=minp)
779779
else:
780-
series_result = get_result(series, window=win)
781-
frame_result = get_result(frame, window=win)
780+
series_result = get_result(series, window=win, min_periods=0)
781+
frame_result = get_result(frame, window=win, min_periods=0)
782782

783783
last_date = series_result.index[-1]
784784
prev_date = last_date - 24 * offsets.BDay()
@@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False):
835835
nan_mask = ~nan_mask
836836
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
837837
else:
838-
result = get_result(self.series, len(self.series) + 1)
839-
expected = get_result(self.series, len(self.series))
838+
result = get_result(self.series, len(self.series) + 1, min_periods=0)
839+
expected = get_result(self.series, len(self.series), min_periods=0)
840840
nan_mask = isna(result)
841841
tm.assert_series_equal(nan_mask, isna(expected))
842842

@@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False):
851851
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15
852852
)[9:].reset_index(drop=True)
853853
else:
854-
result = get_result(obj, 20, center=True)
855-
expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[
856-
9:
857-
].reset_index(drop=True)
854+
result = get_result(obj, 20, min_periods=0, center=True)
855+
print(result)
856+
expected = get_result(
857+
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0
858+
)[9:].reset_index(drop=True)
858859

859860
tm.assert_series_equal(result, expected)
860861

@@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False):
893894
else:
894895
series_xp = (
895896
get_result(
896-
self.series.reindex(list(self.series.index) + s), window=25
897+
self.series.reindex(list(self.series.index) + s),
898+
window=25,
899+
min_periods=0,
897900
)
898901
.shift(-12)
899902
.reindex(self.series.index)
900903
)
901904
frame_xp = (
902905
get_result(
903-
self.frame.reindex(list(self.frame.index) + s), window=25
906+
self.frame.reindex(list(self.frame.index) + s),
907+
window=25,
908+
min_periods=0,
904909
)
905910
.shift(-12)
906911
.reindex(self.frame.index)
907912
)
908913

909-
series_rs = get_result(self.series, window=25, center=True)
910-
frame_rs = get_result(self.frame, window=25, center=True)
914+
series_rs = get_result(
915+
self.series, window=25, min_periods=0, center=True
916+
)
917+
frame_rs = get_result(self.frame, window=25, min_periods=0, center=True)
911918

912919
if fill_value is not None:
913920
series_xp = series_xp.fillna(fill_value)
@@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center):
964971

965972
self._test_moments_consistency_is_constant(
966973
min_periods=min_periods,
967-
count=lambda x: (x.rolling(window=window, center=center).count()),
974+
count=lambda x: (
975+
x.rolling(
976+
window=window, min_periods=min_periods, center=center
977+
).count()
978+
),
968979
mean=lambda x: (
969980
x.rolling(
970981
window=window, min_periods=min_periods, center=center
@@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center):
9891000
).var(ddof=0)
9901001
),
9911002
var_debiasing_factors=lambda x: (
992-
x.rolling(window=window, center=center)
1003+
x.rolling(window=window, min_periods=min_periods, center=center)
9931004
.count()
9941005
.divide(
995-
(x.rolling(window=window, center=center).count() - 1.0).replace(
996-
0.0, np.nan
997-
)
1006+
(
1007+
x.rolling(
1008+
window=window, min_periods=min_periods, center=center
1009+
).count()
1010+
- 1.0
1011+
).replace(0.0, np.nan)
9981012
)
9991013
),
10001014
)
10011015

10021016
self._test_moments_consistency(
10031017
min_periods=min_periods,
1004-
count=lambda x: (x.rolling(window=window, center=center).count()),
1018+
count=lambda x: (
1019+
x.rolling(
1020+
window=window, min_periods=min_periods, center=center
1021+
).count()
1022+
),
10051023
mean=lambda x: (
10061024
x.rolling(
10071025
window=window, min_periods=min_periods, center=center
@@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center):
10711089
if name == "count":
10721090
rolling_f_result = rolling_f()
10731091
rolling_apply_f_result = x.rolling(
1074-
window=window, min_periods=0, center=center
1092+
window=window, min_periods=min_periods, center=center
10751093
).apply(func=f, raw=True)
10761094
else:
10771095
if name in ["cov", "corr"]:

pandas/tests/window/test_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self):
237237
columns=cols,
238238
)
239239

240-
result = df.rolling(window=2).count()
240+
result = df.rolling(window=2, min_periods=0).count()
241241
tm.assert_frame_equal(result, expected)
242242

243-
result = df.rolling(1).count()
243+
result = df.rolling(1, min_periods=0).count()
244244
expected = df.notna().astype(float)
245245
tm.assert_frame_equal(result, expected)
246246

pandas/tests/window/test_expanding.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,22 @@ def test_expanding_axis(self, axis_frame):
113113

114114
result = df.expanding(3, axis=axis_frame).sum()
115115
tm.assert_frame_equal(result, expected)
116+
117+
118+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
119+
def test_expanding_count_with_min_periods(constructor):
120+
# GH 26996
121+
result = constructor(range(5)).expanding(min_periods=3).count()
122+
expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0])
123+
tm.assert_equal(result, expected)
124+
125+
126+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
127+
def test_expanding_count_default_min_periods_with_null_values(constructor):
128+
# GH 26996
129+
values = [1, 2, 3, np.nan, 4, 5, 6]
130+
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
131+
132+
result = constructor(values).expanding().count()
133+
expected = constructor(expected_counts)
134+
tm.assert_equal(result, expected)

pandas/tests/window/test_rolling.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def test_rolling_axis_count(self, axis_frame):
344344
else:
345345
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
346346

347-
result = df.rolling(2, axis=axis_frame).count()
347+
result = df.rolling(2, axis=axis_frame, min_periods=0).count()
348348
tm.assert_frame_equal(result, expected)
349349

350350
def test_readonly_array(self):
@@ -446,3 +446,22 @@ def test_min_periods1():
446446
result = df["a"].rolling(3, center=True, min_periods=1).max()
447447
expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a")
448448
tm.assert_series_equal(result, expected)
449+
450+
451+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
452+
def test_rolling_count_with_min_periods(constructor):
453+
# GH 26996
454+
result = constructor(range(5)).rolling(3, min_periods=3).count()
455+
expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0])
456+
tm.assert_equal(result, expected)
457+
458+
459+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
460+
def test_rolling_count_default_min_periods_with_null_values(constructor):
461+
# GH 26996
462+
values = [1, 2, 3, np.nan, 4, 5, 6]
463+
expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]
464+
465+
result = constructor(values).rolling(3).count()
466+
expected = constructor(expected_counts)
467+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)