Description
Code Sample, a copy-pastable example if possible
import pandas as pd
tbl = pd.DataFrame({"col_num": [1, 1, 2, 3]})
tbl["col_cat"] = tbl["col_num"].astype("category")
# The following line works in 0.25.3 but throws exception in 1.0.0
df = tbl.groupby("col_num").agg({"col_cat": "first"})
Problem description
agg() on categorical column works without any warnings in 0.25.3 but throws exception in pandas 1.0.0:
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
938 func = _maybe_mangle_lambdas(func)
939
--> 940 result, how = self._aggregate(func, *args, **kwargs)
941 if how is None:
942 return result
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\base.py in _aggregate(self, arg, *args, **kwargs)
426
427 try:
--> 428 result = _agg(arg, _agg_1dim)
429 except SpecificationError:
430
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\base.py in _agg(arg, func)
393 result = {}
394 for fname, agg_how in arg.items():
--> 395 result[fname] = func(fname, agg_how)
396 return result
397
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\base.py in _agg_1dim(name, how, subset)
377 "nested dictionary is ambiguous in aggregation"
378 )
--> 379 return colg.aggregate(how)
380
381 def _agg_2dim(name, how):
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
245
246 if isinstance(func, str):
--> 247 return getattr(self, func)(*args, **kwargs)
248
249 elif isinstance(func, abc.Iterable):
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\groupby.py in f(self, **kwargs)
1376 # try a cython aggregation if we can
1377 try:
-> 1378 return self._cython_agg_general(alias, alt=npfunc, **kwargs)
1379 except DataError:
1380 pass
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
888
889 result, agg_names = self.grouper.aggregate(
--> 890 obj._values, how, min_count=min_count
891 )
892
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\ops.py in aggregate(self, values, how, axis, min_count)
579 ) -> Tuple[np.ndarray, Optional[List[str]]]:
580 return self._cython_operation(
--> 581 "aggregate", values, how, axis, min_count=min_count
582 )
583
~\AppData\Local\Continuum\miniconda3\envs\dev37\lib\site-packages\pandas\core\groupby\ops.py in _cython_operation(self, kind, values, how, axis, min_count, **kwargs)
453 # are not setup for dim transforming
454 if is_categorical_dtype(values) or is_sparse(values):
--> 455 raise NotImplementedError(f"{values.dtype} dtype not supported")
456 elif is_datetime64_any_dtype(values):
457 if how in ["add", "prod", "cumsum", "cumprod"]:
NotImplementedError: category dtype not supported
Output of pd.show_versions()
[paste the output of pd.show_versions()
here below this line]
INSTALLED VERSIONS
commit : None
python : 3.7.6.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 1.0.0
numpy : 1.17.5
pytz : 2019.3
dateutil : 2.8.1
pip : 20.0.2
setuptools : 45.1.0.post20200119
Cython : None
pytest : 5.3.5
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 1.2.7
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.11.0
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 5.3.5
pyxlsb : None
s3fs : None
scipy : 1.3.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : None
xlsxwriter : 1.2.7
numba : None