Skip to content

Commit fe244ba

Browse files
authored
BUG: DataFrame.join inconsistently setting result index name (#56948)
* Index.join result name * whatsnew * update test
1 parent c3fc9bb commit fe244ba

File tree

4 files changed

+41
-35
lines changed

4 files changed

+41
-35
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,9 @@ Performance improvements
113113

114114
Bug fixes
115115
~~~~~~~~~
116+
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
116117
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
117-
118+
-
118119

119120
Categorical
120121
^^^^^^^^^^^

pandas/core/indexes/base.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4711,6 +4711,10 @@ def _join_via_get_indexer(
47114711
except TypeError:
47124712
pass
47134713

4714+
names = other.names if how == "right" else self.names
4715+
if join_index.names != names:
4716+
join_index = join_index.set_names(names)
4717+
47144718
if join_index is self:
47154719
lindexer = None
47164720
else:
@@ -5017,7 +5021,7 @@ def _join_monotonic(
50175021
ridx = self._left_indexer_unique(other)
50185022
else:
50195023
join_array, lidx, ridx = self._left_indexer(other)
5020-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5024+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
50215025
elif how == "right":
50225026
if self.is_unique:
50235027
# We can perform much better than the general case
@@ -5026,13 +5030,13 @@ def _join_monotonic(
50265030
ridx = None
50275031
else:
50285032
join_array, ridx, lidx = other._left_indexer(self)
5029-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5033+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
50305034
elif how == "inner":
50315035
join_array, lidx, ridx = self._inner_indexer(other)
5032-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5036+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
50335037
elif how == "outer":
50345038
join_array, lidx, ridx = self._outer_indexer(other)
5035-
join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5039+
join_index = self._wrap_joined_index(join_array, other, lidx, ridx, how)
50365040

50375041
lidx = None if lidx is None else ensure_platform_int(lidx)
50385042
ridx = None if ridx is None else ensure_platform_int(ridx)
@@ -5044,21 +5048,22 @@ def _wrap_joined_index(
50445048
other: Self,
50455049
lidx: npt.NDArray[np.intp],
50465050
ridx: npt.NDArray[np.intp],
5051+
how: JoinHow,
50475052
) -> Self:
50485053
assert other.dtype == self.dtype
5049-
5054+
names = other.names if how == "right" else self.names
50505055
if isinstance(self, ABCMultiIndex):
5051-
name = self.names if self.names == other.names else None
50525056
# error: Incompatible return value type (got "MultiIndex",
50535057
# expected "Self")
50545058
mask = lidx == -1
50555059
join_idx = self.take(lidx)
50565060
right = cast("MultiIndex", other.take(ridx))
50575061
join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
5058-
return join_index.set_names(name) # type: ignore[return-value]
5062+
return join_index.set_names(names) # type: ignore[return-value]
50595063
else:
5060-
name = get_op_result_name(self, other)
5061-
return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
5064+
return self._constructor._with_infer(
5065+
joined, name=names[0], dtype=self.dtype
5066+
)
50625067

50635068
@final
50645069
@cache_readonly

pandas/core/indexes/datetimelike.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575

7676
from pandas._typing import (
7777
Axis,
78+
JoinHow,
7879
Self,
7980
npt,
8081
)
@@ -735,10 +736,15 @@ def _get_join_freq(self, other):
735736
return freq
736737

737738
def _wrap_joined_index(
738-
self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp]
739+
self,
740+
joined,
741+
other,
742+
lidx: npt.NDArray[np.intp],
743+
ridx: npt.NDArray[np.intp],
744+
how: JoinHow,
739745
):
740746
assert other.dtype == self.dtype, (other.dtype, self.dtype)
741-
result = super()._wrap_joined_index(joined, other, lidx, ridx)
747+
result = super()._wrap_joined_index(joined, other, lidx, ridx, how)
742748
result._data._freq = self._get_join_freq(other)
743749
return result
744750

pandas/tests/indexes/numeric/test_join.py

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,14 @@ def test_join_non_unique(self):
2121
tm.assert_numpy_array_equal(ridx, exp_ridx)
2222

2323
def test_join_inner(self):
24-
index = Index(range(0, 20, 2), dtype=np.int64)
25-
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
26-
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
24+
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
25+
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
26+
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")
2727

2828
# not monotonic
2929
res, lidx, ridx = index.join(other, how="inner", return_indexers=True)
3030

31-
# no guarantee of sortedness, so sort for comparison purposes
32-
ind = res.argsort()
33-
res = res.take(ind)
34-
lidx = lidx.take(ind)
35-
ridx = ridx.take(ind)
36-
37-
eres = Index([2, 12], dtype=np.int64)
31+
eres = Index([2, 12], dtype=np.int64, name="lhs")
3832
elidx = np.array([1, 6], dtype=np.intp)
3933
eridx = np.array([4, 1], dtype=np.intp)
4034

@@ -46,7 +40,7 @@ def test_join_inner(self):
4640
# monotonic
4741
res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True)
4842

49-
res2 = index.intersection(other_mono)
43+
res2 = index.intersection(other_mono).set_names(["lhs"])
5044
tm.assert_index_equal(res, res2)
5145

5246
elidx = np.array([1, 6], dtype=np.intp)
@@ -57,9 +51,9 @@ def test_join_inner(self):
5751
tm.assert_numpy_array_equal(ridx, eridx)
5852

5953
def test_join_left(self):
60-
index = Index(range(0, 20, 2), dtype=np.int64)
61-
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
62-
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
54+
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
55+
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
56+
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")
6357

6458
# not monotonic
6559
res, lidx, ridx = index.join(other, how="left", return_indexers=True)
@@ -80,20 +74,20 @@ def test_join_left(self):
8074
tm.assert_numpy_array_equal(ridx, eridx)
8175

8276
# non-unique
83-
idx = Index([1, 1, 2, 5])
84-
idx2 = Index([1, 2, 5, 7, 9])
77+
idx = Index([1, 1, 2, 5], name="rhs")
78+
idx2 = Index([1, 2, 5, 7, 9], name="lhs")
8579
res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True)
86-
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
80+
eres = Index([1, 1, 2, 5, 7, 9], name="lhs") # 1 is in idx2, so it should be x2
8781
eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
8882
elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
8983
tm.assert_index_equal(res, eres)
9084
tm.assert_numpy_array_equal(lidx, elidx)
9185
tm.assert_numpy_array_equal(ridx, eridx)
9286

9387
def test_join_right(self):
94-
index = Index(range(0, 20, 2), dtype=np.int64)
95-
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64)
96-
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64)
88+
index = Index(range(0, 20, 2), dtype=np.int64, name="lhs")
89+
other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64, name="rhs")
90+
other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64, name="rhs")
9791

9892
# not monotonic
9993
res, lidx, ridx = index.join(other, how="right", return_indexers=True)
@@ -115,10 +109,10 @@ def test_join_right(self):
115109
assert ridx is None
116110

117111
# non-unique
118-
idx = Index([1, 1, 2, 5])
119-
idx2 = Index([1, 2, 5, 7, 9])
112+
idx = Index([1, 1, 2, 5], name="lhs")
113+
idx2 = Index([1, 2, 5, 7, 9], name="rhs")
120114
res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True)
121-
eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2
115+
eres = Index([1, 1, 2, 5, 7, 9], name="rhs") # 1 is in idx2, so it should be x2
122116
elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp)
123117
eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp)
124118
tm.assert_index_equal(res, eres)

0 commit comments

Comments
 (0)