Closed
Description
add an argument, min_i (minimum intersection) to nancorr in pandas.src.moments.pyx
this will nan the element if the nobs is < min_i (which should default to 1 to replicate current operations)
need to also propogate argument to: core.frame.corr/cov
possibly add to: core.series.corr/cov
@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, min_i=1):
cdef:
Py_ssize_t i, j, xi, yi, N, K
ndarray[float64_t, ndim=2] result
ndarray[uint8_t, ndim=2] mask
int64_t nobs = 0
float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor
N, K = (<object> mat).shape
result = np.empty((K, K), dtype=np.float64)
mask = np.isfinite(mat).view(np.uint8)
for xi in range(K):
for yi in range(xi + 1):
nobs = sumxx = sumyy = sumx = sumy = 0
for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi]
vy = mat[i, yi]
nobs += 1
sumx += vx
sumy += vy
#### change from nobs == 0 ####
if nobs < min_i:
result[xi, yi] = result[yi, xi] = np.NaN
else:
meanx = sumx / nobs
meany = sumy / nobs
# now the cov numerator
sumx = 0
for i in range(N):
if mask[i, xi] and mask[i, yi]:
vx = mat[i, xi] - meanx
vy = mat[i, yi] - meany
sumx += vx * vy
sumxx += vx * vx
sumyy += vy * vy
divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy)
if divisor != 0:
result[xi, yi] = result[yi, xi] = sumx / divisor
else:
result[xi, yi] = result[yi, xi] = np.NaN
return result