Skip to content

BUG? Possible issue with multi indexing column names of different types #19517

Closed
@AntoineGautier

Description

@AntoineGautier

See #19517 (comment)

In [51]: idx1 = pd.MultiIndex.from_product([['a', 'b'], ['A', 'B']])

In [52]: s1 = pd.Series(index=idx1)

In [53]: s1.loc['a']
Out[53]:
A   NaN
B   NaN
dtype: float64

In [54]: idx2 = pd.MultiIndex.from_product([['a', 'b'], ['A', 'B']], names=[1, 0])

In [55]: s2 = pd.Series(index=idx2)

In [56]: s2.loc['a']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/sandbox/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2672             try:
-> 2673                 return self._engine.get_loc(key)
   2674             except KeyError:

~/sandbox/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
    106
--> 107     cpdef get_loc(self, object val):
    108         if is_definitely_invalid_key(val):

~/sandbox/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
    130         try:
--> 131             return self.mapping.get_item(val)
    132         except (TypeError, ValueError):

~/sandbox/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
   1602
-> 1603     cpdef get_item(self, object val):
   1604         cdef khiter_t k

~/sandbox/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
   1609         else:
-> 1610             raise KeyError(val)
   1611

KeyError: 'a'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-56-761f4dad11a7> in <module>
----> 1 s2.loc['a']

~/sandbox/pandas/pandas/core/indexing.py in __getitem__(self, key)
   1502
   1503             maybe_callable = com.apply_if_callable(key, self.obj)
-> 1504             return self._getitem_axis(maybe_callable, axis=axis)
   1505
   1506     def _is_scalar_access(self, key):

~/sandbox/pandas/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1915         # fall thru to straight lookup
   1916         self._validate_key(key, axis)
-> 1917         return self._get_label(key, axis=axis)
   1918
   1919

~/sandbox/pandas/pandas/core/indexing.py in _get_label(self, label, axis)
    133             # but will fail when the index is not present
    134             # see GH5667
--> 135             return self.obj._xs(label, axis=axis)
    136         elif isinstance(label, tuple) and isinstance(label[axis], slice):
    137             raise IndexingError('no slices here, handle elsewhere')

~/sandbox/pandas/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   3643         if isinstance(index, MultiIndex):
   3644             loc, new_index = self.index.get_loc_level(key,
-> 3645                                                       drop_level=drop_level)
   3646         else:
   3647             loc = self.index.get_loc(key)

~/sandbox/pandas/pandas/core/indexes/multi.py in get_loc_level(self, key, level, drop_level)
   2587                 return indexer, maybe_droplevels(indexer, ilevels, drop_level)
   2588         else:
-> 2589             indexer = self._get_level_indexer(key, level=level)
   2590             return indexer, maybe_droplevels(indexer, [level], drop_level)
   2591

~/sandbox/pandas/pandas/core/indexes/multi.py in _get_level_indexer(self, key, level, indexer)
   2668         else:
   2669
-> 2670             code = level_index.get_loc(key)
   2671
   2672             if level > 0 or self.lexsort_depth == 0:

~/sandbox/pandas/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2673                 return self._engine.get_loc(key)
   2674             except KeyError:
-> 2675                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2676         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2677         if indexer.ndim > 1 or indexer.size > 1:

~/sandbox/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
    105         arr[loc] = value
    106
--> 107     cpdef get_loc(self, object val):
    108         if is_definitely_invalid_key(val):
    109             raise TypeError("'{val}' is an invalid key".format(val=val))

~/sandbox/pandas/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
    129
    130         try:
--> 131             return self.mapping.get_item(val)
    132         except (TypeError, ValueError):
    133             raise KeyError(val)

~/sandbox/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
   1601                                        sizeof(uint32_t))  # flags
   1602
-> 1603     cpdef get_item(self, object val):
   1604         cdef khiter_t k
   1605

~/sandbox/pandas/pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
   1608             return self.table.vals[k]
   1609         else:
-> 1610             raise KeyError(val)
   1611
   1612     cpdef set_item(self, object key, Py_ssize_t val):

KeyError: 'a'

Code Sample, a copy-pastable example if possible

df = pd.read_csv('https://raw.githubusercontent.com/AntoineGautier/Data/master/new.txt')
df.rename({'1': 1, '3': 3, '0': 0}, axis=1, inplace=True)
df.set_index(list(df.columns[:3]), inplace=True)
df.sort_index(inplace=True)
for idx in df.index:
    tmp = df.loc[idx, :]

Problem description

The code leads to the exception: KeyError: (1949L, '37,5 cl ', 'Ch\xc3\xa2teau LAFITE ROTHSCHILD ').
No exception if all column names are strings.

Remark: This case with mixed types columns names typically occurs when reading from external file with header=None and then merging.

Output of pd.show_versions()

INSTALLED VERSIONS

commit: None
python: 2.7.13.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None

pandas: 0.22.0
pytest: 3.0.7
pip: 9.0.1
setuptools: 38.4.0
Cython: 0.25.2
numpy: 1.14.0
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 5.5.0
sphinx: 1.5.6
patsy: 0.4.1
dateutil: 2.6.1
pytz: 2017.3
blosc: None
bottleneck: 1.2.1
tables: 3.2.2
numexpr: 2.6.2
feather: None
matplotlib: 2.0.2
openpyxl: 2.4.7
xlrd: 1.0.0
xlwt: 1.2.0
xlsxwriter: 0.9.6
lxml: 4.1.1
bs4: 4.6.0
html5lib: 0.999
sqlalchemy: 1.1.9
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.5.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    IndexingRelated to indexing on series/frames, not to indexes themselvesNeeds TestsUnit test(s) needed to prevent regressionsgood first issue

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions