Skip to content

ValueError on df.agg if a list of functions is given #31851

Open
@endremborza

Description

@endremborza

Code Sample

import numpy as np
import pandas as pd

_arr = np.array([1,2,3,4,5,np.nan])
_arr2 = np.array([1,2,3,4,5,7])

df = pd.DataFrame({"A": _arr, "B": _arr2})

def perc_fun(q):
    def f(arr):
        return np.nanpercentile(arr, q)
    f.__name__ = f"p-{q}"
    return f

all is as expected here:

perc_fun(10)(_arr)
>>> 1.4

df.agg(perc_fun(10))
>>> A    1.4
>>> B    1.5
>>> dtype: float64

but this dies:

df.agg(["mean", perc_fun(10)])

TypeError Traceback (most recent call last)
~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
553 try:
--> 554 return concat(results, keys=keys, axis=1, sort=False)
555 except TypeError:

~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
280 copy=copy,
--> 281 sort=sort,
282 )

~/.local/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
356 )
--> 357 raise TypeError(msg)
358

TypeError: cannot concatenate object of type '<class 'numpy.float64'>'; only Series and DataFrame objs are valid

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
in
----> 1 df.agg(["mean", perc_fun(10)])

~/.local/lib/python3.7/site-packages/pandas/core/frame.py in aggregate(self, func, axis, *args, **kwargs)
6704 result = None
6705 try:
-> 6706 result, how = self._aggregate(func, axis=axis, *args, **kwargs)
6707 except TypeError:
6708 pass

~/.local/lib/python3.7/site-packages/pandas/core/frame.py in _aggregate(self, arg, axis, *args, **kwargs)
6718 result = result.T if result is not None else result
6719 return result, how
-> 6720 return super()._aggregate(arg, *args, **kwargs)
6721
6722 agg = aggregate

~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
484 elif is_list_like(arg):
485 # we require a list, but not an 'str'
--> 486 return self._aggregate_multiple_funcs(arg, _axis=_axis), None
487 else:
488 result = None

~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
530 colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index])
531 try:
--> 532 new_res = colg.aggregate(arg)
533 except (TypeError, DataError):
534 pass

~/.local/lib/python3.7/site-packages/pandas/core/series.py in aggregate(self, func, axis, *args, **kwargs)
3686 # Validate the axis parameter
3687 self._get_axis_number(axis)
-> 3688 result, how = self._aggregate(func, *args, **kwargs)
3689 if result is None:
3690

~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
484 elif is_list_like(arg):
485 # we require a list, but not an 'str'
--> 486 return self._aggregate_multiple_funcs(arg, _axis=_axis), None
487 else:
488 result = None

~/.local/lib/python3.7/site-packages/pandas/core/base.py in _aggregate_multiple_funcs(self, arg, _axis)
562 result = Series(results, index=keys, name=self.name)
563 if is_nested_object(result):
--> 564 raise ValueError("cannot combine transform and aggregation operations")
565 return result
566

ValueError: cannot combine transform and aggregation operations

Problem description

The same code produced a different error in 0.24 so I assume some work has been done, but the error message is not very informative and I really think this should just work by inserting the appropriate 1.4 and 1.5 values

I couldn't find any related issues, if no one has experience with this, I will look into .agg code

Output of pd.show_versions()

INSTALLED VERSIONS

commit : None
python : 3.7.4.final.0
python-bits : 64
OS : Linux
OS-release : 5.2.1-1.el7.elrepo.x86_64
machine : x86_64
processor :
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8

pandas : 1.0.1
numpy : 1.18.1
pytz : 2018.4
dateutil : 2.7.3
pip : 19.3.1
setuptools : 41.4.0
Cython : None
pytest : 4.1.1
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 1.0.1
pymysql : None
psycopg2 : 2.8.1 (dt dec pq3 ext lo64)
jinja2 : 2.10
IPython : 7.9.0
pandas_datareader: None
bs4 : None
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 4.1.1
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : 1.2.8
tables : None
tabulate : 0.8.6
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None
numba : None

Metadata

Metadata

Assignees

No one assigned

    Labels

    ApplyApply, Aggregate, Transform, MapBug

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions