Skip to content

Aggregating over arrays #3788

Closed
Closed
@hayd

Description

@hayd

This is frowned upon behaviour (storing arrays inside DataFrames) but is there a reason for this raise?

Deleting the raising lines seems to only break tests to check that they're raising...

df = pd.DataFrame([[1,np.array([10,20,30])],
               [1,np.array([40,50,60])], 
               [2,np.array([20,30,40])],], columns=['category','arraydata'])
g = df.groupby('category')
g.agg(sum)
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-34-527a2010b455> in <module>()
----> 1 g.agg(sum)

/Users/andy/pandas/pandas/core/groupby.py in agg(self, func, *args, **kwargs)
    337     @Appender(_agg_doc)
    338     def agg(self, func, *args, **kwargs):
--> 339         return self.aggregate(func, *args, **kwargs)
    340
    341     def _iterate_slices(self):

/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   1740             cyfunc = _intercept_cython(arg)
   1741             if cyfunc and not args and not kwargs:
-> 1742                 return getattr(self, cyfunc)()
   1743
   1744             if self.grouper.nkeys > 1:

/Users/andy/pandas/pandas/core/groupby.py in f(self)
     62             raise SpecificationError(str(e))
     63         except Exception:
---> 64             result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
     65             if _convert:
     66                 result = result.convert_objects()

/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   1745                 return self._python_agg_general(arg, *args, **kwargs)
   1746             else:
-> 1747                 result = self._aggregate_generic(arg, *args, **kwargs)
   1748
   1749         if not self.as_index:

/Users/andy/pandas/pandas/core/groupby.py in _aggregate_generic(self, func, *args, **kwargs)
   1803                     result[name] = self._try_cast(func(data, *args, **kwargs),data)
   1804             except Exception:
-> 1805                 return self._aggregate_item_by_item(func, *args, **kwargs)
   1806         else:
   1807             for name in self.indices:

/Users/andy/pandas/pandas/core/groupby.py in _aggregate_item_by_item(self, func, *args, **kwargs)
   1828                 colg = SeriesGroupBy(obj[item], selection=item,
   1829                                      grouper=self.grouper)
-> 1830                 result[item] = colg.aggregate(func, *args, **kwargs)
   1831             except ValueError:
   1832                 cannot_agg.append(item)

/Users/andy/pandas/pandas/core/groupby.py in aggregate(self, func_or_funcs, *args, **kwargs)
   1425                 return self._python_agg_general(func_or_funcs, *args, **kwargs)
   1426             except Exception:
-> 1427                 result = self._aggregate_named(func_or_funcs, *args, **kwargs)
   1428
   1429             index = Index(sorted(result), name=self.grouper.names[0])

/Users/andy/pandas/pandas/core/groupby.py in _aggregate_named(self, func, *args, **kwargs)
   1509             output = func(group, *args, **kwargs)
   1510             if isinstance(output, np.ndarray):
-> 1511                 raise Exception('Must produce aggregated value')
   1512             result[name] = self._try_cast(output, group)
   1513

Exception: Must produce aggregated value

http://stackoverflow.com/questions/16975318/pandas-aggregate-when-column-contains-numpy-arrays

Metadata

Metadata

Assignees

No one assigned

    Labels

    GroupbyTestingpandas testing functions or related to the test suite

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions