Skip to content

Commit 8e9b3ee

Browse files
fujiaxiangjreback
authored andcommitted
Bug groupby quantile listlike q and int columns (#30485)
1 parent f738581 commit 8e9b3ee

File tree

3 files changed

+43
-12
lines changed

3 files changed

+43
-12
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,7 @@ Groupby/resample/rolling
848848
- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`)
849849
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
850850
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
851+
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
851852

852853
Reshaping
853854
^^^^^^^^^

pandas/core/groupby/groupby.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -1937,21 +1937,22 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
19371937
# >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
19381938
# but this hits https://github.com/pandas-dev/pandas/issues/10710
19391939
# which doesn't reorder the list-like `q` on the inner level.
1940-
order = np.roll(list(range(result.index.nlevels)), -1)
1941-
result = result.reorder_levels(order)
1942-
result = result.reindex(q, level=-1)
1940+
order = list(range(1, result.index.nlevels)) + [0]
1941+
1942+
# temporarily saves the index names
1943+
index_names = np.array(result.index.names)
19431944

1944-
# fix order.
1945-
hi = len(q) * self.ngroups
1946-
arr = np.arange(0, hi, self.ngroups)
1947-
arrays = []
1945+
# set index names to positions to avoid confusion
1946+
result.index.names = np.arange(len(index_names))
1947+
1948+
# place quantiles on the inside
1949+
result = result.reorder_levels(order)
19481950

1949-
for i in range(self.ngroups):
1950-
arr2 = arr + i
1951-
arrays.append(arr2)
1951+
# restore the index names in order
1952+
result.index.names = index_names[order]
19521953

1953-
indices = np.concatenate(arrays)
1954-
assert len(indices) == len(result)
1954+
# reorder rows to keep things sorted
1955+
indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten()
19551956
return result.take(indices)
19561957

19571958
@Substitution(name="groupby")

pandas/tests/groupby/test_function.py

+29
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,35 @@ def test_quantile_array_multiple_levels():
13981398
tm.assert_frame_equal(result, expected)
13991399

14001400

1401+
@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)])
1402+
@pytest.mark.parametrize("groupby", [[0], [0, 1]])
1403+
@pytest.mark.parametrize("q", [[0.5, 0.6]])
1404+
def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q):
1405+
# GH30289
1406+
nrow, ncol = frame_size
1407+
df = pd.DataFrame(
1408+
np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)
1409+
)
1410+
1411+
idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q]
1412+
idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [
1413+
list(range(len(q))) * min(nrow, 4)
1414+
]
1415+
expected_index = pd.MultiIndex(
1416+
levels=idx_levels, codes=idx_codes, names=groupby + [None]
1417+
)
1418+
expected_values = [
1419+
[float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q
1420+
]
1421+
expected_columns = [x for x in range(ncol) if x not in groupby]
1422+
expected = pd.DataFrame(
1423+
expected_values, index=expected_index, columns=expected_columns
1424+
)
1425+
result = df.groupby(groupby).quantile(q)
1426+
1427+
tm.assert_frame_equal(result, expected)
1428+
1429+
14011430
def test_quantile_raises():
14021431
df = pd.DataFrame(
14031432
[["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]

0 commit comments

Comments
 (0)