Skip to content

Commit 75714de

Browse files
sinhrksjreback
authored andcommitted
BUG: remove_unused_categories dtype coerces to int64
Author: sinhrks <[email protected]> Closes #13261 from sinhrks/categorical_remove_dtype and squashes the following commits: b5cbe2c [sinhrks] BUG: remove_unused_categories dtype coerces to int64
1 parent 8662cb9 commit 75714de

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

doc/source/whatsnew/v0.18.2.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,5 @@ Bug Fixes
248248

249249

250250
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
251+
252+
- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)

pandas/core/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -898,8 +898,8 @@ def remove_unused_categories(self, inplace=False):
898898
if idx.size != 0 and idx[0] == -1: # na sentinel
899899
idx, inv = idx[1:], inv - 1
900900

901-
cat._codes = inv
902901
cat._categories = cat.categories.take(idx)
902+
cat._codes = _coerce_indexer_dtype(inv, self._categories)
903903

904904
if not inplace:
905905
return cat

pandas/tests/test_categorical.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,14 +1022,14 @@ def f():
10221022
def test_remove_unused_categories(self):
10231023
c = Categorical(["a", "b", "c", "d", "a"],
10241024
categories=["a", "b", "c", "d", "e"])
1025-
exp_categories_all = np.array(["a", "b", "c", "d", "e"])
1026-
exp_categories_dropped = np.array(["a", "b", "c", "d"])
1025+
exp_categories_all = Index(["a", "b", "c", "d", "e"])
1026+
exp_categories_dropped = Index(["a", "b", "c", "d"])
10271027

10281028
self.assert_numpy_array_equal(c.categories, exp_categories_all)
10291029

10301030
res = c.remove_unused_categories()
1031-
self.assert_numpy_array_equal(res.categories, exp_categories_dropped)
1032-
self.assert_numpy_array_equal(c.categories, exp_categories_all)
1031+
self.assert_index_equal(res.categories, exp_categories_dropped)
1032+
self.assert_index_equal(c.categories, exp_categories_all)
10331033

10341034
res = c.remove_unused_categories(inplace=True)
10351035
self.assert_numpy_array_equal(c.categories, exp_categories_dropped)
@@ -1039,15 +1039,18 @@ def test_remove_unused_categories(self):
10391039
c = Categorical(["a", "b", "c", np.nan],
10401040
categories=["a", "b", "c", "d", "e"])
10411041
res = c.remove_unused_categories()
1042-
self.assert_numpy_array_equal(res.categories,
1043-
np.array(["a", "b", "c"]))
1044-
self.assert_numpy_array_equal(c.categories, exp_categories_all)
1042+
self.assert_index_equal(res.categories,
1043+
Index(np.array(["a", "b", "c"])))
1044+
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
1045+
self.assert_numpy_array_equal(res.codes, exp_codes)
1046+
self.assert_index_equal(c.categories, exp_categories_all)
10451047

10461048
val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan]
10471049
cat = pd.Categorical(values=val, categories=list('ABCDEFG'))
10481050
out = cat.remove_unused_categories()
1049-
self.assert_numpy_array_equal(out.categories, ['B', 'D', 'F'])
1050-
self.assert_numpy_array_equal(out.codes, [2, -1, 1, 0, 1, 2, -1])
1051+
self.assert_index_equal(out.categories, Index(['B', 'D', 'F']))
1052+
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
1053+
self.assert_numpy_array_equal(out.codes, exp_codes)
10511054
self.assertEqual(out.get_values().tolist(), val)
10521055

10531056
alpha = list('abcdefghijklmnopqrstuvwxyz')

0 commit comments

Comments
 (0)