-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Closed
Labels
AlgosNon-arithmetic algos: value_counts, factorize, sorting, isin, clip, shift, diffNon-arithmetic algos: value_counts, factorize, sorting, isin, clip, shift, diffCleanMissing-datanp.nan, pd.NaT, pd.NA, dropna, isnull, interpolatenp.nan, pd.NaT, pd.NA, dropna, isnull, interpolateRefactorInternal refactoring of codeInternal refactoring of code
Description
See comment #14536 (comment)
Move the _nanpercentile
functionality used in quantile
in internals.py
(
pandas/pandas/core/internals.py
Lines 1319 to 1393 in 52f31d4
def _nanpercentile1D(values, mask, q, **kw): | |
values = values[~mask] | |
if len(values) == 0: | |
if is_scalar(q): | |
return self._na_value | |
else: | |
return np.array([self._na_value] * len(q), | |
dtype=values.dtype) | |
return np.percentile(values, q, **kw) | |
def _nanpercentile(values, q, axis, **kw): | |
mask = isnull(self.values) | |
if not is_scalar(mask) and mask.any(): | |
if self.ndim == 1: | |
return _nanpercentile1D(values, mask, q, **kw) | |
else: | |
# for nonconsolidatable blocks mask is 1D, but values 2D | |
if mask.ndim < values.ndim: | |
mask = mask.reshape(values.shape) | |
if axis == 0: | |
values = values.T | |
mask = mask.T | |
result = [_nanpercentile1D(val, m, q, **kw) for (val, m) | |
in zip(list(values), list(mask))] | |
result = np.array(result, dtype=values.dtype, copy=False).T | |
return result | |
else: | |
return np.percentile(values, q, axis=axis, **kw) | |
from pandas import Float64Index | |
is_empty = values.shape[axis] == 0 | |
if is_list_like(qs): | |
ax = Float64Index(qs) | |
if is_empty: | |
if self.ndim == 1: | |
result = self._na_value | |
else: | |
# create the array of na_values | |
# 2d len(values) * len(qs) | |
result = np.repeat(np.array([self._na_value] * len(qs)), | |
len(values)).reshape(len(values), | |
len(qs)) | |
else: | |
try: | |
result = _nanpercentile(values, np.array(qs) * 100, | |
axis=axis, **kw) | |
except ValueError: | |
# older numpies don't handle an array for q | |
result = [_nanpercentile(values, q * 100, | |
axis=axis, **kw) for q in qs] | |
result = np.array(result, copy=False) | |
if self.ndim > 1: | |
result = result.T | |
else: | |
if self.ndim == 1: | |
ax = Float64Index([qs]) | |
else: | |
ax = mgr.axes[0] | |
if is_empty: | |
if self.ndim == 1: | |
result = self._na_value | |
else: | |
result = np.array([self._na_value] * len(self)) | |
else: | |
result = _nanpercentile(values, qs * 100, axis=axis, **kw) |
nanops.py
.Metadata
Metadata
Assignees
Labels
AlgosNon-arithmetic algos: value_counts, factorize, sorting, isin, clip, shift, diffNon-arithmetic algos: value_counts, factorize, sorting, isin, clip, shift, diffCleanMissing-datanp.nan, pd.NaT, pd.NA, dropna, isnull, interpolatenp.nan, pd.NaT, pd.NA, dropna, isnull, interpolateRefactorInternal refactoring of codeInternal refactoring of code