Skip to content

DataFrameGroupBy.cumsum(axis=0) fails when grouped on axis=1 #21127

Closed
@kunalgosar

Description

@kunalgosar

Code Sample, a copy-pastable example if possible

In [3]: df = pd.DataFrame({'col1': [0, 1, 2, 3], 'col2': [4, 5, 6, 7],
   ...:                   'col3': [8, 9, 10, 11], 'col4': [12, 13, 14, 15],
   ...:                   'col5': [0, 0, 0, 0]})
   ...:                   

In [4]: df.groupby(by=[1, 2, 3, 1, 2], axis=1).cumsum()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/dev/pandas/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes)
   4872         blocks = form_blocks(arrays, names, axes)
-> 4873         mgr = BlockManager(blocks, axes)
   4874         mgr._consolidate_inplace()

~/dev/pandas/pandas/core/internals.py in __init__(self, blocks, axes, do_integrity_check)
   3281         if do_integrity_check:
-> 3282             self._verify_integrity()
   3283 

~/dev/pandas/pandas/core/internals.py in _verify_integrity(self)
   3492             if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
-> 3493                 construction_error(tot_items, block.shape[1:], self.axes)
   3494         if len(self.items) != tot_items:

~/dev/pandas/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e)
   4842     raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 4843         passed, implied))
   4844 

ValueError: Shape of passed values is (4, 5), indices imply (4, 4)

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-4-11b5865f27e2> in <module>()
----> 1 df.groupby(by=[1, 2, 3, 1, 2], axis=1).cumsum()

~/dev/pandas/pandas/core/groupby/groupby.py in cumsum(self, axis, *args, **kwargs)
   1926             return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
   1927 
-> 1928         return self._cython_transform('cumsum', **kwargs)
   1929 
   1930     @Substitution(name='groupby')

~/dev/pandas/pandas/core/groupby/groupby.py in _cython_transform(self, how, numeric_only, **kwargs)
   1036             raise DataError('No numeric types to aggregate')
   1037 
-> 1038         return self._wrap_transformed_output(output, names)
   1039 
   1040     def _cython_agg_general(self, how, alt=None, numeric_only=True,

~/dev/pandas/pandas/core/groupby/groupby.py in _wrap_transformed_output(self, output, names)
   4737 
   4738     def _wrap_transformed_output(self, output, names=None):
-> 4739         return DataFrame(output, index=self.obj.index)
   4740 
   4741     def _wrap_agged_blocks(self, items, blocks):

~/dev/pandas/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    346                                  dtype=dtype, copy=copy)
    347         elif isinstance(data, dict):
--> 348             mgr = self._init_dict(data, index, columns, dtype=dtype)
    349         elif isinstance(data, ma.MaskedArray):
    350             import numpy.ma.mrecords as mrecords

~/dev/pandas/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
    457             arrays = [data[k] for k in keys]
    458 
--> 459         return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
    460 
    461     def _init_ndarray(self, values, index, columns, dtype=None, copy=False):

~/dev/pandas/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
   7321     axes = [_ensure_index(columns), _ensure_index(index)]
   7322 
-> 7323     return create_block_manager_from_arrays(arrays, arr_names, axes)
   7324 
   7325 

~/dev/pandas/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes)
   4875         return mgr
   4876     except ValueError as e:
-> 4877         construction_error(len(arrays), arrays[0].shape, axes, e)
   4878 
   4879 

~/dev/pandas/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e)
   4841         raise ValueError("Empty data passed with indices specified.")
   4842     raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 4843         passed, implied))
   4844 
   4845 

ValueError: Shape of passed values is (4, 5), indices imply (4, 4)

Output of pd.show_versions()

INSTALLED VERSIONS

commit: d7c6e01
python: 3.6.5.final.0
python-bits: 64
OS: Darwin
OS-release: 17.5.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8

pandas: 0.23.0rc2+30.gd7c6e0130
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0.28.2
numpy: 1.14.2
scipy: 1.0.1
pyarrow: 0.9.0
xarray: 0.10.3
IPython: 6.3.1
sphinx: 1.7.4
patsy: 0.5.0
dateutil: 2.7.2
pytz: 2018.4
blosc: None
bottleneck: 1.2.1
tables: 3.4.3
numexpr: 2.6.5
feather: 0.4.0
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.2.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: 0.8.0
psycopg2: None
jinja2: 2.10
s3fs: 0.1.4
fastparquet: 0.1.5
pandas_gbq: None
pandas_datareader: None

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions