Skip to content

Commit 2244402

Browse files
authored
PERF: Only clear cached .levels when setting MultiIndex.names (#59578)
* PERF: Only clear cached .levels when setting MultiIndex.names * whatsnew number
1 parent db1b8ab commit 2244402

File tree

3 files changed

+29
-13
lines changed

3 files changed

+29
-13
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ Performance improvements
503503
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504504
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505505
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
506+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506507
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507508
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508509
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)

pandas/core/indexes/multi.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ def dtypes(self) -> Series:
799799
"""
800800
from pandas import Series
801801

802-
names = com.fill_missing_names([level.name for level in self.levels])
802+
names = com.fill_missing_names(self.names)
803803
return Series([level.dtype for level in self.levels], index=Index(names))
804804

805805
def __len__(self) -> int:
@@ -1572,7 +1572,7 @@ def _format_multi(
15721572
def _get_names(self) -> FrozenList:
15731573
return FrozenList(self._names)
15741574

1575-
def _set_names(self, names, *, level=None, validate: bool = True) -> None:
1575+
def _set_names(self, names, *, level=None) -> None:
15761576
"""
15771577
Set new names on index. Each name has to be a hashable type.
15781578
@@ -1583,8 +1583,6 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
15831583
level : int, level name, or sequence of int/level names (default None)
15841584
If the index is a MultiIndex (hierarchical), level(s) to set (None
15851585
for all levels). Otherwise level must be None
1586-
validate : bool, default True
1587-
validate that the names match level lengths
15881586
15891587
Raises
15901588
------
@@ -1603,13 +1601,12 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
16031601
raise ValueError("Names should be list-like for a MultiIndex")
16041602
names = list(names)
16051603

1606-
if validate:
1607-
if level is not None and len(names) != len(level):
1608-
raise ValueError("Length of names must match length of level.")
1609-
if level is None and len(names) != self.nlevels:
1610-
raise ValueError(
1611-
"Length of names must match number of levels in MultiIndex."
1612-
)
1604+
if level is not None and len(names) != len(level):
1605+
raise ValueError("Length of names must match length of level.")
1606+
if level is None and len(names) != self.nlevels:
1607+
raise ValueError(
1608+
"Length of names must match number of levels in MultiIndex."
1609+
)
16131610

16141611
if level is None:
16151612
level = range(self.nlevels)
@@ -1627,8 +1624,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
16271624
)
16281625
self._names[lev] = name
16291626

1630-
# If .levels has been accessed, the names in our cache will be stale.
1631-
self._reset_cache()
1627+
# If .levels has been accessed, the .name of each level in our cache
1628+
# will be stale.
1629+
self._reset_cache("levels")
16321630

16331631
names = property(
16341632
fset=_set_names,

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas import (
66
DataFrame,
77
MultiIndex,
8+
RangeIndex,
89
Series,
910
)
1011
import pandas._testing as tm
@@ -68,3 +69,19 @@ def test_indexer_caching(monkeypatch):
6869
s[s == 0] = 1
6970
expected = Series(np.ones(size_cutoff), index=index)
7071
tm.assert_series_equal(s, expected)
72+
73+
74+
def test_set_names_only_clears_level_cache():
75+
mi = MultiIndex.from_arrays([range(4), range(4)], names=["a", "b"])
76+
mi.dtypes
77+
mi.is_monotonic_increasing
78+
mi._engine
79+
mi.levels
80+
old_cache_keys = sorted(mi._cache.keys())
81+
assert old_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing", "levels"]
82+
mi.names = ["A", "B"]
83+
new_cache_keys = sorted(mi._cache.keys())
84+
assert new_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing"]
85+
new_levels = mi.levels
86+
tm.assert_index_equal(new_levels[0], RangeIndex(4, name="A"))
87+
tm.assert_index_equal(new_levels[1], RangeIndex(4, name="B"))

0 commit comments

Comments
 (0)