Description
Code Sample, a copy-pastable example if possible
In [2]: df = pd.DataFrame(-1, index=range(3), columns=list('abcd'))
In [3]: df.loc[:, list('abe')]
/home/nobackup/repo/pandas/pandas/core/indexing.py:1504: FutureWarning:
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
return self._getitem_tuple(key)
Out[3]:
a b e
0 -1 -1 NaN
1 -1 -1 NaN
2 -1 -1 NaN
In [4]: df.columns = df.columns.astype('category')
In [5]: df.loc[:, list('abe')]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-5-f046079c7cb9> in <module>()
----> 1 df.loc[:, list('abe')]
/home/nobackup/repo/pandas/pandas/core/indexing.py in __getitem__(self, key)
1502 except (KeyError, IndexError):
1503 pass
-> 1504 return self._getitem_tuple(key)
1505 else:
1506 # we by definition only have the 0th axis
/home/nobackup/repo/pandas/pandas/core/indexing.py in _getitem_tuple(self, tup)
894 continue
895
--> 896 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
897
898 return retval
/home/nobackup/repo/pandas/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1910 raise ValueError('Cannot index with multidimensional key')
1911
-> 1912 return self._getitem_iterable(key, axis=axis)
1913
1914 # nested tuple slicing
/home/nobackup/repo/pandas/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
1211 # A collection of keys
1212 keyarr, indexer = self._get_listlike_indexer(key, axis,
-> 1213 raise_missing=False)
1214 return self.obj._reindex_with_indexers({axis: [keyarr, indexer]},
1215 copy=True, allow_dups=True)
/home/nobackup/repo/pandas/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
1148 # if it cannot handle:
1149 indexer, keyarr = ax._convert_listlike_indexer(key,
-> 1150 kind=self.name)
1151 # We only act on all found values:
1152 if indexer is not None and (indexer != -1).all():
/home/nobackup/repo/pandas/pandas/core/indexes/base.py in _convert_listlike_indexer(self, keyarr, kind)
1670 keyarr = self._convert_arr_indexer(keyarr)
1671
-> 1672 indexer = self._convert_list_indexer(keyarr, kind=kind)
1673 return indexer, keyarr
1674
/home/nobackup/repo/pandas/pandas/core/indexes/category.py in _convert_list_indexer(self, keyarr, kind)
622 if (indexer == -1).any():
623 raise KeyError(
--> 624 "a list-indexer must only "
625 "include values that are "
626 "in the categories")
KeyError: 'a list-indexer must only include values that are in the categories'
Problem description
I don't have time to bisect now, but I guess this is due to #21569 (or alternatively #21594).
However I don't think the fix should be (another special case) in core/indexing.py
, but rather in Categorical
code, which once more is artificially limiting the ability of interfacing with missing categories, something we should avoid at least in the internals.
(More in general, it is annoying that a specific kind of index performs, on given - public and private - methods, differently from all other kinds)
Expected Output
Out[3]:
(Unless we decide that our deprecation cycle for missing keys in .loc
immediately affects Categorical
axes, and document this...)
Output of pd.show_versions()
INSTALLED VERSIONS
commit: dc45fba
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.9.0-6-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.UTF-8
LOCALE: it_IT.UTF-8
pandas: 0.24.0.dev0+193.gdc45fbafe
pytest: 3.5.0
pip: 9.0.1
setuptools: 39.2.0
Cython: 0.25.2
numpy: 1.14.3
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 6.2.1
sphinx: 1.5.6
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: None
bottleneck: 1.2.0dev
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.2.2.post1153+gff6786446
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.3.0
xlsxwriter: 0.9.6
lxml: 4.1.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.2.1
gcsfs: None