Skip to content

Commit 53e83c7

Browse files
authored
BUG: DataFrame.stack with EA columns (#44401)
1 parent 91da75d commit 53e83c7

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,8 @@ Reshaping
623623
- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`)
624624
- Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`)
625625
- Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`)
626+
- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`)
627+
-
626628

627629
Sparse
628630
^^^^^^

pandas/core/reshape/reshape.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,13 +745,15 @@ def _convert_level_number(level_num, columns):
745745
if frame._is_homogeneous_type and is_extension_array_dtype(
746746
frame.dtypes.iloc[0]
747747
):
748+
# TODO(EA2D): won't need special case, can go through .values
749+
# paths below (might change to ._values)
748750
dtype = this[this.columns[loc]].dtypes.iloc[0]
749751
subset = this[this.columns[loc]]
750752

751753
value_slice = dtype.construct_array_type()._concat_same_type(
752754
[x._values for _, x in subset.items()]
753755
)
754-
N, K = this.shape
756+
N, K = subset.shape
755757
idx = np.arange(N * K).reshape(K, N).T.ravel()
756758
value_slice = value_slice.take(idx)
757759

pandas/tests/frame/test_stack_unstack.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2099,3 +2099,27 @@ def test_stack_unsorted(self):
20992099
result = DF.stack(["VAR", "TYP"]).sort_index()
21002100
expected = DF.sort_index(axis=1).stack(["VAR", "TYP"]).sort_index()
21012101
tm.assert_series_equal(result, expected)
2102+
2103+
def test_stack_nullable_dtype(self):
2104+
# GH#43561
2105+
columns = MultiIndex.from_product(
2106+
[["54511", "54515"], ["r", "t_mean"]], names=["station", "element"]
2107+
)
2108+
index = Index([1, 2, 3], name="time")
2109+
2110+
arr = np.array([[50, 226, 10, 215], [10, 215, 9, 220], [305, 232, 111, 220]])
2111+
df = DataFrame(arr, columns=columns, index=index, dtype=pd.Int64Dtype())
2112+
2113+
result = df.stack("station")
2114+
2115+
expected = df.astype(np.int64).stack("station").astype(pd.Int64Dtype())
2116+
tm.assert_frame_equal(result, expected)
2117+
2118+
# non-homogeneous case
2119+
df[df.columns[0]] = df[df.columns[0]].astype(pd.Float64Dtype())
2120+
result = df.stack("station")
2121+
2122+
# TODO(EA2D): we get object dtype because DataFrame.values can't
2123+
# be an EA
2124+
expected = df.astype(object).stack("station")
2125+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)