124
124
from pandas .core .dtypes .generic import (
125
125
ABCDataFrame ,
126
126
ABCDatetimeIndex ,
127
+ ABCIntervalIndex ,
127
128
ABCMultiIndex ,
128
129
ABCPeriodIndex ,
129
130
ABCSeries ,
@@ -3491,8 +3492,6 @@ def _intersection(self, other: Index, sort: bool = False):
3491
3492
and other .is_monotonic_increasing
3492
3493
and self ._can_use_libjoin
3493
3494
and other ._can_use_libjoin
3494
- and not isinstance (self , ABCMultiIndex )
3495
- and not isinstance (other , ABCMultiIndex )
3496
3495
):
3497
3496
try :
3498
3497
res_indexer , indexer , _ = self ._inner_indexer (other )
@@ -4631,28 +4630,13 @@ def join(
4631
4630
4632
4631
_validate_join_method (how )
4633
4632
4634
- if not self .is_unique and not other .is_unique :
4635
- return self ._join_non_unique (other , how = how , sort = sort )
4636
- elif not self .is_unique or not other .is_unique :
4637
- if self .is_monotonic_increasing and other .is_monotonic_increasing :
4638
- # Note: 2023-08-15 we *do* have tests that get here with
4639
- # Categorical, string[python] (can use libjoin)
4640
- # and Interval (cannot)
4641
- if self ._can_use_libjoin and other ._can_use_libjoin :
4642
- # otherwise we will fall through to _join_via_get_indexer
4643
- # GH#39133
4644
- # go through object dtype for ea till engine is supported properly
4645
- return self ._join_monotonic (other , how = how )
4646
- else :
4647
- return self ._join_non_unique (other , how = how , sort = sort )
4648
- elif (
4649
- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4650
- self .is_monotonic_increasing
4633
+ if (
4634
+ not isinstance (self .dtype , CategoricalDtype )
4635
+ and self .is_monotonic_increasing
4651
4636
and other .is_monotonic_increasing
4652
4637
and self ._can_use_libjoin
4653
4638
and other ._can_use_libjoin
4654
- and not isinstance (self , ABCMultiIndex )
4655
- and not isinstance (self .dtype , CategoricalDtype )
4639
+ and (self .is_unique or other .is_unique )
4656
4640
):
4657
4641
# Categorical is monotonic if data are ordered as categories, but join can
4658
4642
# not handle this in case of not lexicographically monotonic GH#38502
@@ -4661,6 +4645,8 @@ def join(
4661
4645
except TypeError :
4662
4646
# object dtype; non-comparable objects
4663
4647
pass
4648
+ elif not self .is_unique or not other .is_unique :
4649
+ return self ._join_non_unique (other , how = how , sort = sort )
4664
4650
4665
4651
return self ._join_via_get_indexer (other , how , sort )
4666
4652
@@ -4796,6 +4782,9 @@ def _join_non_unique(
4796
4782
join_idx = self .take (left_idx )
4797
4783
right = other .take (right_idx )
4798
4784
join_index = join_idx .putmask (mask , right )
4785
+ if isinstance (join_index , ABCMultiIndex ) and how == "outer" :
4786
+ # test_join_index_levels
4787
+ join_index = join_index ._sort_levels_monotonic ()
4799
4788
return join_index , left_idx , right_idx
4800
4789
4801
4790
@final
@@ -5041,10 +5030,10 @@ def _can_use_libjoin(self) -> bool:
5041
5030
or isinstance (self ._values , (ArrowExtensionArray , BaseMaskedArray ))
5042
5031
or self .dtype == "string[python]"
5043
5032
)
5044
- # For IntervalIndex, the conversion to numpy converts
5045
- # to object dtype, which negates the performance benefit of libjoin
5046
- # TODO: exclude RangeIndex and MultiIndex as these also make copies?
5047
- return not isinstance (self . dtype , IntervalDtype )
5033
+ # Exclude index types where the conversion to numpy converts to object dtype,
5034
+ # which negates the performance benefit of libjoin
5035
+ # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5036
+ return not isinstance (self , ( ABCIntervalIndex , ABCMultiIndex ) )
5048
5037
5049
5038
# --------------------------------------------------------------------
5050
5039
# Uncategorized Methods
@@ -5179,8 +5168,7 @@ def _get_join_target(self) -> np.ndarray:
5179
5168
# present
5180
5169
return self ._values .to_numpy ()
5181
5170
5182
- # TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5183
- # copies.
5171
+ # TODO: exclude ABCRangeIndex case here as it copies
5184
5172
target = self ._get_engine_target ()
5185
5173
if not isinstance (target , np .ndarray ):
5186
5174
raise ValueError ("_can_use_libjoin should return False." )
0 commit comments