Closed
Description
This is frowned upon behaviour (storing arrays inside DataFrames) but is there a reason for this raise?
Deleting the raising lines seems to only break tests to check that they're raising...
df = pd.DataFrame([[1,np.array([10,20,30])],
[1,np.array([40,50,60])],
[2,np.array([20,30,40])],], columns=['category','arraydata'])
g = df.groupby('category')
g.agg(sum)
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-34-527a2010b455> in <module>()
----> 1 g.agg(sum)
/Users/andy/pandas/pandas/core/groupby.py in agg(self, func, *args, **kwargs)
337 @Appender(_agg_doc)
338 def agg(self, func, *args, **kwargs):
--> 339 return self.aggregate(func, *args, **kwargs)
340
341 def _iterate_slices(self):
/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
1740 cyfunc = _intercept_cython(arg)
1741 if cyfunc and not args and not kwargs:
-> 1742 return getattr(self, cyfunc)()
1743
1744 if self.grouper.nkeys > 1:
/Users/andy/pandas/pandas/core/groupby.py in f(self)
62 raise SpecificationError(str(e))
63 except Exception:
---> 64 result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
65 if _convert:
66 result = result.convert_objects()
/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
1745 return self._python_agg_general(arg, *args, **kwargs)
1746 else:
-> 1747 result = self._aggregate_generic(arg, *args, **kwargs)
1748
1749 if not self.as_index:
/Users/andy/pandas/pandas/core/groupby.py in _aggregate_generic(self, func, *args, **kwargs)
1803 result[name] = self._try_cast(func(data, *args, **kwargs),data)
1804 except Exception:
-> 1805 return self._aggregate_item_by_item(func, *args, **kwargs)
1806 else:
1807 for name in self.indices:
/Users/andy/pandas/pandas/core/groupby.py in _aggregate_item_by_item(self, func, *args, **kwargs)
1828 colg = SeriesGroupBy(obj[item], selection=item,
1829 grouper=self.grouper)
-> 1830 result[item] = colg.aggregate(func, *args, **kwargs)
1831 except ValueError:
1832 cannot_agg.append(item)
/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs)
1425 return self._python_agg_general(func_or_funcs, *args, **kwargs)
1426 except Exception:
-> 1427 result = self._aggregate_named(func_or_funcs, *args, **kwargs)
1428
1429 index = Index(sorted(result), name=self.grouper.names[0])
/Users/andy/pandas/pandas/core/groupby.py in _aggregate_named(self, func, *args, **kwargs)
1509 output = func(group, *args, **kwargs)
1510 if isinstance(output, np.ndarray):
-> 1511 raise Exception('Must produce aggregated value')
1512 result[name] = self._try_cast(output, group)
1513
Exception: Must produce aggregated value
http://stackoverflow.com/questions/16975318/pandas-aggregate-when-column-contains-numpy-arrays