Description
Code Sample
import numpy as np
import pandas as pd
_arr = np.array([1,2,3,4,5,np.nan])
_arr2 = np.array([1,2,3,4,5,7])
df = pd.DataFrame({"A": _arr, "B": _arr2})
def perc_fun(q):
def f(arr):
return np.nanpercentile(arr, q)
f.__name__ = f"p-{q}"
return f
all is as expected here:
perc_fun(10)(_arr)
>>> 1.4
df.agg(perc_fun(10))
>>> A 1.4
>>> B 1.5
>>> dtype: float64
but this dies:
df.agg(["mean", perc_fun(10)])
TypeError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
553 try:
--> 554 return concat(results, keys=keys, axis=1, sort=False)
555 except TypeError:
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
280 copy=copy,
--> 281 sort=sort,
282 )
~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
356 )
--> 357 raise TypeError(msg)
358
TypeError: cannot concatenate object of type '<class 'numpy.float64'>'; only Series and DataFrame objs are valid
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
----> 1 df.agg(["mean", perc_fun(10)])
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in aggregate(self, func, axis, *args, **kwargs)
6704 result = None
6705 try:
-> 6706 result, how = self._aggregate(func, axis=axis, *args, **kwargs)
6707 except TypeError:
6708 pass
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in _aggregate(self, arg, axis, *args, **kwargs)
6718 result = result.T if result is not None else result
6719 return result, how
-> 6720 return super()._aggregate(arg, *args, **kwargs)
6721
6722 agg = aggregate
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
484 elif is_list_like(arg):
485 # we require a list, but not an 'str'
--> 486 return self._aggregate_multiple_funcs(arg, _axis=_axis), None
487 else:
488 result = None
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
530 colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index])
531 try:
--> 532 new_res = colg.aggregate(arg)
533 except (TypeError, DataError):
534 pass
~/.local/lib/python3.7/site-packages/pandas/core/series.py in aggregate(self, func, axis, *args, **kwargs)
3686 # Validate the axis parameter
3687 self._get_axis_number(axis)
-> 3688 result, how = self._aggregate(func, *args, **kwargs)
3689 if result is None:
3690
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
484 elif is_list_like(arg):
485 # we require a list, but not an 'str'
--> 486 return self._aggregate_multiple_funcs(arg, _axis=_axis), None
487 else:
488 result = None
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
562 result = Series(results, index=keys, name=self.name)
563 if is_nested_object(result):
--> 564 raise ValueError("cannot combine transform and aggregation operations")
565 return result
566
ValueError: cannot combine transform and aggregation operations
Problem description
The same code produced a different error in 0.24 so I assume some work has been done, but the error message is not very informative and I really think this should just work by inserting the appropriate 1.4 and 1.5 values
I couldn't find any related issues, if no one has experience with this, I will look into .agg code
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.4.final.0
python-bits : 64
OS : Linux
OS-release : 5.2.1-1.el7.elrepo.x86_64
machine : x86_64
processor :
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.0.1
numpy : 1.18.1
pytz : 2018.4
dateutil : 2.7.3
pip : 19.3.1
setuptools : 41.4.0
Cython : None
pytest : 4.1.1
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 1.0.1
pymysql : None
psycopg2 : 2.8.1 (dt dec pq3 ext lo64)
jinja2 : 2.10
IPython : 7.9.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 4.1.1
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : 1.2.8
tables : None
tabulate : 0.8.6
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None
numba : None