Skip to content

Commit 4eb9769

Browse files
almaleksiajreback
authored andcommitted
remove NaN in categories checking (#20372)
1 parent 4dc05b5 commit 4eb9769

File tree

1 file changed

+6
-26
lines changed

1 file changed

+6
-26
lines changed

pandas/core/arrays/categorical.py

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,7 +1258,7 @@ def isna(self):
12581258
"""
12591259
Detect missing values
12601260
1261-
Both missing values (-1 in .codes) and NA as a category are detected.
1261+
Missing values (-1 in .codes) are detected.
12621262
12631263
Returns
12641264
-------
@@ -1273,13 +1273,6 @@ def isna(self):
12731273
"""
12741274

12751275
ret = self._codes == -1
1276-
1277-
# String/object and float categories can hold np.nan
1278-
if self.categories.dtype.kind in ['S', 'O', 'f']:
1279-
if np.nan in self.categories:
1280-
nan_pos = np.where(isna(self.categories))[0]
1281-
# we only have one NA in categories
1282-
ret = np.logical_or(ret, self._codes == nan_pos)
12831276
return ret
12841277
isnull = isna
12851278

@@ -1315,16 +1308,14 @@ def dropna(self):
13151308
"""
13161309
Return the Categorical without null values.
13171310
1318-
Both missing values (-1 in .codes) and NA as a category are detected.
1319-
NA is removed from the categories if present.
1311+
Missing values (-1 in .codes) are detected.
13201312
13211313
Returns
13221314
-------
13231315
valid : Categorical
13241316
"""
13251317
result = self[self.notna()]
1326-
if isna(result.categories).any():
1327-
result = result.remove_categories([np.nan])
1318+
13281319
return result
13291320

13301321
def value_counts(self, dropna=True):
@@ -1336,7 +1327,7 @@ def value_counts(self, dropna=True):
13361327
Parameters
13371328
----------
13381329
dropna : boolean, default True
1339-
Don't include counts of NaN, even if NaN is a category.
1330+
Don't include counts of NaN.
13401331
13411332
Returns
13421333
-------
@@ -1348,11 +1339,9 @@ def value_counts(self, dropna=True):
13481339
13491340
"""
13501341
from numpy import bincount
1351-
from pandas import isna, Series, CategoricalIndex
1342+
from pandas import Series, CategoricalIndex
13521343

1353-
obj = (self.remove_categories([np.nan]) if dropna and
1354-
isna(self.categories).any() else self)
1355-
code, cat = obj._codes, obj.categories
1344+
code, cat = self._codes, self.categories
13561345
ncat, mask = len(cat), 0 <= code
13571346
ix, clean = np.arange(ncat), mask.all()
13581347

@@ -1880,15 +1869,6 @@ def __setitem__(self, key, value):
18801869
key = np.asarray(key)
18811870

18821871
lindexer = self.categories.get_indexer(rvalue)
1883-
1884-
# FIXME: the following can be removed after GH7820 is fixed:
1885-
# https://github.com/pandas-dev/pandas/issues/7820
1886-
# float categories do currently return -1 for np.nan, even if np.nan is
1887-
# included in the index -> "repair" this here
1888-
if isna(rvalue).any() and isna(self.categories).any():
1889-
nan_pos = np.where(isna(self.categories))[0]
1890-
lindexer[lindexer == -1] = nan_pos
1891-
18921872
lindexer = self._maybe_coerce_indexer(lindexer)
18931873
self._codes[key] = lindexer
18941874

0 commit comments

Comments
 (0)