Skip to content

WIP: Experimental changes in rolling_var related to #7900 #7916

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 36 additions & 43 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1160,75 +1160,68 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
"""
Numerically stable implementation using Welford's method.
"""
cdef double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
cdef Py_ssize_t i
cdef double val, prev, mean_x = 0, ssqdm_x = 0, delta, rep = NaN
cdef Py_ssize_t nobs = 0, nrep = 0, i
cdef Py_ssize_t N = len(input)

cdef ndarray[double_t] output = np.empty(N, dtype=float)

minp = _check_minp(win, minp, N)

# Check for windows larger than array, addresses #7297
win = min(win, N)

# Over the first window, observations can only be added, never removed
for i from 0 <= i < win:
for i from 0 <= i < N:
val = input[i]
prev = NaN if i < win else input[i - win]

# First, count the number of observations and consecutive repeats
if prev == prev:
# prev is not NaN, removing an observation...
if nobs == nrep:
# ...and removing a repeat
nrep -= 1
if nrep == 0:
rep = NaN
nobs -= 1

# Not NaN
if val == val:
nobs += 1
delta = (val - mean_x)
mean_x += delta / nobs
ssqdm_x += delta * (val - mean_x)

if nobs >= minp:
#pathological case
if nobs == 1:
val = 0
# next is not NaN, adding an observation...
if val == prev:
# ...and adding a repeat
nrep += 1
else:
val = ssqdm_x / (nobs - ddof)
if val < 0:
val = 0
else:
val = NaN

output[i] = val

# After the first window, observations can both be added and removed
for i from win <= i < N:
val = input[i]
prev = input[i - win]
# ...and resetting repeats
nrep = 1
rep = val
nobs += 1

if val == val:
# Then, compute the new mean and sum of squared differences
if nobs == nrep:
# All non-NaN values in window are identical...
ssqdm_x = 0
mean_x = rep if nobs > 0 else 0
elif val == val:
# Adding one observation...
if prev == prev:
# Adding one observation and removing another one
# ...and removing another
delta = val - prev
prev -= mean_x
mean_x += delta / nobs
val -= mean_x
ssqdm_x += (val + prev) * delta
else:
# Adding one observation and not removing any
nobs += 1
# ...and not removing any
delta = (val - mean_x)
mean_x += delta / nobs
ssqdm_x += delta * (val - mean_x)
elif prev == prev:
# Adding no new observation, but removing one
nobs -= 1
if nobs:
delta = (prev - mean_x)
mean_x -= delta / nobs
ssqdm_x -= delta * (prev - mean_x)
else:
mean_x = 0
ssqdm_x = 0
delta = (prev - mean_x)
mean_x -= delta / nobs
ssqdm_x -= delta * (prev - mean_x)
# Variance is unchanged if no observation is added or removed

# Finally, compute and write the rolling variance to the output array
if nobs >= minp:
#pathological case
if nobs == 1:
if nobs <= ddof:
val = 0
else:
val = ssqdm_x / (nobs - ddof)
Expand Down