Skip to content

Commit 4973c59

Browse files
sinhrksjreback
authored andcommitted
BUG: CategoricalIndex.get_loc returns array even if it is unique
related to #11558 Author: sinhrks <[email protected]> Closes #12531 from sinhrks/cat_get_loc and squashes the following commits: 2749b62 [sinhrks] BUG: CategoricalIndex.get_loc returns array even if it is unique
1 parent 03d684e commit 4973c59

File tree

4 files changed

+94
-5
lines changed

4 files changed

+94
-5
lines changed

doc/source/whatsnew/v0.18.1.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,5 @@ Bug Fixes
8989
~~~~~~~~~
9090

9191
- Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`)
92+
93+
- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`)

pandas/indexes/category.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,7 @@ def get_loc(self, key, method=None):
287287
codes = self.categories.get_loc(key)
288288
if (codes == -1):
289289
raise KeyError(key)
290-
indexer, _ = self._engine.get_indexer_non_unique(np.array([codes]))
291-
if (indexer == -1).any():
292-
raise KeyError(key)
293-
294-
return indexer
290+
return self._engine.get_loc(codes)
295291

296292
def _can_reindex(self, indexer):
297293
""" always allow reindexing """

pandas/tests/indexes/test_category.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,52 @@ def test_get_indexer(self):
363363
self.assertRaises(NotImplementedError,
364364
lambda: idx2.get_indexer(idx1, method='nearest'))
365365

366+
def test_get_loc(self):
367+
# GH 12531
368+
cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
369+
idx1 = Index(list('abcde'))
370+
self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
371+
self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))
372+
373+
for i in [cidx1, idx1]:
374+
with tm.assertRaises(KeyError):
375+
i.get_loc('NOT-EXIST')
376+
377+
# non-unique
378+
cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
379+
idx2 = Index(list('aacded'))
380+
381+
# results in bool array
382+
res = cidx2.get_loc('d')
383+
self.assert_numpy_array_equal(res, idx2.get_loc('d'))
384+
self.assert_numpy_array_equal(res, np.array([False, False, False,
385+
True, False, True]))
386+
# unique element results in scalar
387+
res = cidx2.get_loc('e')
388+
self.assertEqual(res, idx2.get_loc('e'))
389+
self.assertEqual(res, 4)
390+
391+
for i in [cidx2, idx2]:
392+
with tm.assertRaises(KeyError):
393+
i.get_loc('NOT-EXIST')
394+
395+
# non-unique, slicable
396+
cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
397+
idx3 = Index(list('aabbb'))
398+
399+
# results in slice
400+
res = cidx3.get_loc('a')
401+
self.assertEqual(res, idx3.get_loc('a'))
402+
self.assertEqual(res, slice(0, 2, None))
403+
404+
res = cidx3.get_loc('b')
405+
self.assertEqual(res, idx3.get_loc('b'))
406+
self.assertEqual(res, slice(2, 5, None))
407+
408+
for i in [cidx3, idx3]:
409+
with tm.assertRaises(KeyError):
410+
i.get_loc('c')
411+
366412
def test_repr_roundtrip(self):
367413

368414
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)

pandas/tests/indexing/test_categorical.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,51 @@ def test_loc_listlike_dtypes(self):
180180
'that are in the categories'):
181181
df.loc[['a', 'x']]
182182

183+
def test_ix_categorical_index(self):
184+
# GH 12531
185+
df = pd.DataFrame(np.random.randn(3, 3),
186+
index=list('ABC'), columns=list('XYZ'))
187+
cdf = df.copy()
188+
cdf.index = pd.CategoricalIndex(df.index)
189+
cdf.columns = pd.CategoricalIndex(df.columns)
190+
191+
expect = pd.Series(df.ix['A', :], index=cdf.columns, name='A')
192+
assert_series_equal(cdf.ix['A', :], expect)
193+
194+
expect = pd.Series(df.ix[:, 'X'], index=cdf.index, name='X')
195+
assert_series_equal(cdf.ix[:, 'X'], expect)
196+
197+
expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
198+
index=pd.CategoricalIndex(list('AB')))
199+
assert_frame_equal(cdf.ix[['A', 'B'], :], expect)
200+
201+
expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index,
202+
columns=pd.CategoricalIndex(list('XY')))
203+
assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect)
204+
205+
# non-unique
206+
df = pd.DataFrame(np.random.randn(3, 3),
207+
index=list('ABA'), columns=list('XYX'))
208+
cdf = df.copy()
209+
cdf.index = pd.CategoricalIndex(df.index)
210+
cdf.columns = pd.CategoricalIndex(df.columns)
211+
212+
expect = pd.DataFrame(df.ix['A', :], columns=cdf.columns,
213+
index=pd.CategoricalIndex(list('AA')))
214+
assert_frame_equal(cdf.ix['A', :], expect)
215+
216+
expect = pd.DataFrame(df.ix[:, 'X'], index=cdf.index,
217+
columns=pd.CategoricalIndex(list('XX')))
218+
assert_frame_equal(cdf.ix[:, 'X'], expect)
219+
220+
expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
221+
index=pd.CategoricalIndex(list('AAB')))
222+
assert_frame_equal(cdf.ix[['A', 'B'], :], expect)
223+
224+
expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index,
225+
columns=pd.CategoricalIndex(list('XXY')))
226+
assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect)
227+
183228
def test_read_only_source(self):
184229
# GH 10043
185230
rw_array = np.eye(10)

0 commit comments

Comments
 (0)