Skip to content

Commit b070359

Browse files
phoflim-vinicius
authored and
im-vinicius
committed
PERF: Performance regression in Groupby.apply with group_keys=True (pandas-dev#53195)
1 parent c8bde1d commit b070359

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

doc/source/whatsnew/v2.0.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed performance regression in :meth:`GroupBy.apply` (:issue:`53195`)
1617
- Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`)
1718
- Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`)
1819
- Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`)

pandas/core/reshape/concat.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ def _clean_keys_and_objs(
532532
keys = type(keys).from_tuples(clean_keys, names=keys.names)
533533
else:
534534
name = getattr(keys, "name", None)
535-
keys = Index(clean_keys, name=name)
535+
keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None))
536536

537537
if len(objs) == 0:
538538
raise ValueError("All objects passed were None")
@@ -806,15 +806,19 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
806806

807807
for hlevel, level in zip(zipped, levels):
808808
to_concat = []
809-
for key, index in zip(hlevel, indexes):
810-
# Find matching codes, include matching nan values as equal.
811-
mask = (isna(level) & isna(key)) | (level == key)
812-
if not mask.any():
813-
raise ValueError(f"Key {key} not in level {level}")
814-
i = np.nonzero(mask)[0][0]
815-
816-
to_concat.append(np.repeat(i, len(index)))
817-
codes_list.append(np.concatenate(to_concat))
809+
if isinstance(hlevel, Index) and hlevel.equals(level):
810+
lens = [len(idx) for idx in indexes]
811+
codes_list.append(np.repeat(np.arange(len(hlevel)), lens))
812+
else:
813+
for key, index in zip(hlevel, indexes):
814+
# Find matching codes, include matching nan values as equal.
815+
mask = (isna(level) & isna(key)) | (level == key)
816+
if not mask.any():
817+
raise ValueError(f"Key {key} not in level {level}")
818+
i = np.nonzero(mask)[0][0]
819+
820+
to_concat.append(np.repeat(i, len(index)))
821+
codes_list.append(np.concatenate(to_concat))
818822

819823
concat_index = _concat_indexes(indexes)
820824

0 commit comments

Comments
 (0)