Description
This is a very strange error with a long traceback. I found #15680 and #11324 mentioning similar things, but neither seem to cover the behavior here
Code Sample, a copy-pastable example:
import datetime
import pandas as pd
recs = [{'LIVE': 1,
'ITEM': '001',
'DATE': datetime.date(2019, 10, 1)},
{'LIVE': 2,
'ITEM': '002',
'DATE': datetime.date(2019, 10, 2)},
{'LIVE': 3,
'ITEM': '003',
'DATE': datetime.date(2019, 10, 1)}]
pd.DataFrame(recs).groupby(['ITEM', 'DATE']).apply(lambda df: df.head(1))
on 0.25.3 this raises this convoluted KeyError:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: Timestamp('2019-10-01 00:00:00')
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _make_concat_multiindex(indexes, keys, levels, names)
631 try:
--> 632 i = level.get_loc(key)
633 except KeyError:
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: Timestamp('2019-10-01 00:00:00')
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
724 try:
--> 725 result = self._python_apply_general(f)
726 except Exception:
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
744 return self._wrap_applied_output(
--> 745 keys, values, not_indexed_same=mutated or self.mutated
746 )
~/.local/lib/python3.7/site-packages/pandas/core/groupby/generic.py in _wrap_applied_output(self, keys, values, not_indexed_same)
371 elif isinstance(v, DataFrame):
--> 372 return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
373 elif self.grouper.groupings is not None:
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _concat_objects(self, keys, values, not_indexed_same)
972 names=group_names,
--> 973 sort=False,
974 )
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
254 copy=copy,
--> 255 sort=sort,
256 )
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
427
--> 428 self.new_axes = self._get_new_axes()
429
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
521
--> 522 new_axes[self.axis] = self._get_concat_axis()
523 return new_axes
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self)
577 concat_axis = _make_concat_multiindex(
--> 578 indexes, self.keys, self.levels, self.names
579 )
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _make_concat_multiindex(indexes, keys, levels, names)
635 "Key {key!s} not in level {level!s}".format(
--> 636 key=key, level=level
637 )
ValueError: Key 2019-10-01 00:00:00 not in level Index([2019-10-01, 2019-10-02], dtype='object', name='DATE')
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: Timestamp('2019-10-01 00:00:00')
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _make_concat_multiindex(indexes, keys, levels, names)
631 try:
--> 632 i = level.get_loc(key)
633 except KeyError:
~/.local/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: Timestamp('2019-10-01 00:00:00')
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-32-cf7f934e7c7f> in <module>
12 'DATE': datetime.date(2019, 10, 1)}]
13
---> 14 pd.DataFrame(recs).groupby(['ITEM', 'DATE']).apply(lambda df: df.head(1))
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
735
736 with _group_selection_context(self):
--> 737 return self._python_apply_general(f)
738
739 return result
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
743
744 return self._wrap_applied_output(
--> 745 keys, values, not_indexed_same=mutated or self.mutated
746 )
747
~/.local/lib/python3.7/site-packages/pandas/core/groupby/generic.py in _wrap_applied_output(self, keys, values, not_indexed_same)
370 return DataFrame()
371 elif isinstance(v, DataFrame):
--> 372 return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
373 elif self.grouper.groupings is not None:
374 if len(self.grouper.groupings) > 1:
~/.local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _concat_objects(self, keys, values, not_indexed_same)
971 levels=group_levels,
972 names=group_names,
--> 973 sort=False,
974 )
975 else:
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
253 verify_integrity=verify_integrity,
254 copy=copy,
--> 255 sort=sort,
256 )
257
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
426 self.copy = copy
427
--> 428 self.new_axes = self._get_new_axes()
429
430 def get_result(self):
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
520 new_axes[i] = ax
521
--> 522 new_axes[self.axis] = self._get_concat_axis()
523 return new_axes
524
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self)
576 else:
577 concat_axis = _make_concat_multiindex(
--> 578 indexes, self.keys, self.levels, self.names
579 )
580
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _make_concat_multiindex(indexes, keys, levels, names)
634 raise ValueError(
635 "Key {key!s} not in level {level!s}".format(
--> 636 key=key, level=level
637 )
638 )
ValueError: Key 2019-10-01 00:00:00 not in level Index([2019-10-01, 2019-10-02], dtype='object', name='DATE')
oddly, both of these work
pd.DataFrame(recs).groupby(['ITEM']).apply(lambda df: df.head(1))
pd.DataFrame(recs).groupby(['DATE']).apply(lambda df: df.head(1))
also, the behavior is the same if I modify recs so that only 1 distinct date is present
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.3.final.0
python-bits : 64
OS : Linux
OS-release : 5.2.1-1.el7.elrepo.x86_64
machine : x86_64
processor :
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 0.25.3
numpy : 1.17.4
pytz : 2019.3
dateutil : 2.8.0
pip : 19.2.3
setuptools : 41.0.1
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 1.0.1
pymysql : None
psycopg2 : 2.8.4 (dt dec pq3 ext lo64)
jinja2 : 2.10.3
IPython : 7.8.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.1
numexpr : 2.7.0
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.3.2
sqlalchemy : 1.3.10
tables : 3.6.1
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None