Skip to content

BUG: ValueError: ndarray is not C-contiguous for cummax on nullable dtypes #61031

Open
@MarcoGorelli

Description

@MarcoGorelli

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

In [5]: df = pd.DataFrame({'a': [1,1,2], 'b': [4,5,6], 'i': [0,1,2]}, dtype='Int64')[::-1]

In [6]: df.groupby('a')['b'].cummax()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[6], line 1
----> 1 df.groupby('a')['b'].cummax()

File ~/pandas-dev/pandas/core/groupby/groupby.py:4945, in GroupBy.cummax(self, numeric_only, **kwargs)
   4886 """
   4887 Cumulative max for each group.
   4888
   (...)
   4942 bull    6   9
   4943 """
   4944 skipna = kwargs.get("skipna", True)
-> 4945 return self._cython_transform(
   4946     "cummax", numeric_only=numeric_only, skipna=skipna
   4947 )

File ~/pandas-dev/pandas/core/groupby/generic.py:684, in SeriesGroupBy._cython_transform(self, how, numeric_only, **kwargs)
    681 obj = self._obj_with_exclusions
    683 try:
--> 684     result = self._grouper._cython_operation(
    685         "transform", obj._values, how, 0, **kwargs
    686     )
    687 except NotImplementedError as err:
    688     # e.g. test_groupby_raises_string
    689     raise TypeError(f"{how} is not supported for {obj.dtype} dtype") from err

File ~/pandas-dev/pandas/core/groupby/ops.py:932, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs)
    928 assert kind in ["transform", "aggregate"]
    930 cy_op = WrappedCythonOp(kind=kind, how=how, has_dropped_na=self.has_dropped_na)
--> 932 return cy_op.cython_operation(
    933     values=values,
    934     axis=axis,
    935     min_count=min_count,
    936     comp_ids=self.ids,
    937     ngroups=self.ngroups,
    938     **kwargs,
    939 )

File ~/pandas-dev/pandas/core/groupby/ops.py:546, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs)
    542 self._validate_axis(axis, values)
    544 if not isinstance(values, np.ndarray):
    545     # i.e. ExtensionArray
--> 546     return values._groupby_op(
    547         how=self.how,
    548         has_dropped_na=self.has_dropped_na,
    549         min_count=min_count,
    550         ngroups=ngroups,
    551         ids=comp_ids,
    552         **kwargs,
    553     )
    555 return self._cython_op_ndim_compat(
    556     values,
    557     min_count=min_count,
   (...)
    561     **kwargs,
    562 )

File ~/pandas-dev/pandas/core/arrays/masked.py:1602, in BaseMaskedArray._groupby_op(self, how, has_dropped_na, min_count, ngroups, ids, **kwargs)
   1599 if how == "rank" and kwargs.get("na_option") in ["top", "bottom"]:
   1600     result_mask[:] = False
-> 1602 res_values = op._cython_op_ndim_compat(
   1603     self._data,
   1604     min_count=min_count,
   1605     ngroups=ngroups,
   1606     comp_ids=ids,
   1607     mask=mask,
   1608     result_mask=result_mask,
   1609     **kwargs,
   1610 )
   1612 if op.how == "ohlc":
   1613     arity = op._cython_arity.get(op.how, 1)

File ~/pandas-dev/pandas/core/groupby/ops.py:331, in WrappedCythonOp._cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
    329 if result_mask is not None:
    330     result_mask = result_mask[None, :]
--> 331 res = self._call_cython_op(
    332     values2d,
    333     min_count=min_count,
    334     ngroups=ngroups,
    335     comp_ids=comp_ids,
    336     mask=mask,
    337     result_mask=result_mask,
    338     **kwargs,
    339 )
    340 if res.shape[0] == 1:
    341     return res[0]

File ~/pandas-dev/pandas/core/groupby/ops.py:477, in WrappedCythonOp._call_cython_op(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
    474     if self.how != "rank":
    475         # TODO: should rank take result_mask?
    476         kwargs["result_mask"] = result_mask
--> 477     func(
    478         out=result,
    479         values=values,
    480         labels=comp_ids,
    481         ngroups=ngroups,
    482         is_datetimelike=is_datetimelike,
    483         mask=mask,
    484         **kwargs,
    485     )
    487 if self.kind == "aggregate" and self.how not in ["idxmin", "idxmax"]:
    488     # i.e. counts is defined.  Locations where count<min_count
    489     # need to have the result set to np.nan, which may require casting,
    490     # see GH#40767. For idxmin/idxmax is handled specially via post-processing
    491     if result.dtype.kind in "iu" and not is_datetimelike:
    492         # if the op keeps the int dtypes, we have to use 0

File groupby.pyx:2287, in pandas._libs.groupby.group_cummax()

File <stringsource>:663, in View.MemoryView.memoryview_cwrapper()

File <stringsource>:353, in View.MemoryView.memoryview.__cinit__()

ValueError: ndarray is not C-contiguous

Issue Description

groupby-cummax raises for nullable integers in this case

Note that for pyarrow-backed integers it works fine

Expected Behavior

2    6
1    5
0    5
Name: b, dtype: Int64

Installed Versions

INSTALLED VERSIONS

commit : b552dc9
python : 3.10.12
python-bits : 64
OS : Linux
OS-release : 5.15.167.4-microsoft-standard-WSL2
Version : #1 SMP Tue Nov 5 00:21:55 UTC 2024
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8

pandas : 3.0.0.dev0+618.gb552dc95c9
numpy : 1.26.4
dateutil : 2.9.0.post0
pip : 25.0
Cython : 3.0.11
sphinx : 8.1.3
IPython : 8.32.0
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.13.3
blosc : None
bottleneck : 1.4.2
fastparquet : 2024.11.0
fsspec : 2025.2.0
html5lib : 1.1
hypothesis : 6.125.2
gcsfs : 2025.2.0
jinja2 : 3.1.5
lxml.etree : 5.3.0
matplotlib : 3.10.0
numba : 0.61.0
numexpr : 2.10.2
odfpy : None
openpyxl : 3.1.5
psycopg2 : 2.9.10
pymysql : 1.4.6
pyarrow : 19.0.0
pyreadstat : 1.2.8
pytest : 8.3.4
python-calamine : None
pytz : 2025.1
pyxlsb : 1.0.10
s3fs : 2025.2.0
scipy : 1.15.1
sqlalchemy : 2.0.37
tables : 3.10.1
tabulate : 0.9.0
xarray : 2024.9.0
xlrd : 2.0.1
xlsxwriter : 3.2.2
zstandard : 0.23.0
tzdata : 2025.1
qtpy : None
pyqt5 : None

Spotted in Narwhals

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions