124124from pandas .core .dtypes .generic import (
125125 ABCDataFrame ,
126126 ABCDatetimeIndex ,
127+ ABCIntervalIndex ,
127128 ABCMultiIndex ,
128129 ABCPeriodIndex ,
129130 ABCSeries ,
@@ -3492,8 +3493,6 @@ def _intersection(self, other: Index, sort: bool = False):
34923493 and other .is_monotonic_increasing
34933494 and self ._can_use_libjoin
34943495 and other ._can_use_libjoin
3495- and not isinstance (self , ABCMultiIndex )
3496- and not isinstance (other , ABCMultiIndex )
34973496 ):
34983497 try :
34993498 res_indexer , indexer , _ = self ._inner_indexer (other )
@@ -4632,28 +4631,13 @@ def join(
46324631
46334632 _validate_join_method (how )
46344633
4635- if not self .is_unique and not other .is_unique :
4636- return self ._join_non_unique (other , how = how , sort = sort )
4637- elif not self .is_unique or not other .is_unique :
4638- if self .is_monotonic_increasing and other .is_monotonic_increasing :
4639- # Note: 2023-08-15 we *do* have tests that get here with
4640- # Categorical, string[python] (can use libjoin)
4641- # and Interval (cannot)
4642- if self ._can_use_libjoin and other ._can_use_libjoin :
4643- # otherwise we will fall through to _join_via_get_indexer
4644- # GH#39133
4645- # go through object dtype for ea till engine is supported properly
4646- return self ._join_monotonic (other , how = how )
4647- else :
4648- return self ._join_non_unique (other , how = how , sort = sort )
4649- elif (
4650- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4651- self .is_monotonic_increasing
4634+ if (
4635+ not isinstance (self .dtype , CategoricalDtype )
4636+ and self .is_monotonic_increasing
46524637 and other .is_monotonic_increasing
46534638 and self ._can_use_libjoin
46544639 and other ._can_use_libjoin
4655- and not isinstance (self , ABCMultiIndex )
4656- and not isinstance (self .dtype , CategoricalDtype )
4640+ and (self .is_unique or other .is_unique )
46574641 ):
46584642 # Categorical is monotonic if data are ordered as categories, but join can
46594643 # not handle this in case of not lexicographically monotonic GH#38502
@@ -4662,6 +4646,8 @@ def join(
46624646 except TypeError :
46634647 # object dtype; non-comparable objects
46644648 pass
4649+ elif not self .is_unique or not other .is_unique :
4650+ return self ._join_non_unique (other , how = how , sort = sort )
46654651
46664652 return self ._join_via_get_indexer (other , how , sort )
46674653
@@ -4797,6 +4783,9 @@ def _join_non_unique(
47974783 join_idx = self .take (left_idx )
47984784 right = other .take (right_idx )
47994785 join_index = join_idx .putmask (mask , right )
4786+ if isinstance (join_index , ABCMultiIndex ) and how == "outer" :
4787+ # test_join_index_levels
4788+ join_index = join_index ._sort_levels_monotonic ()
48004789 return join_index , left_idx , right_idx
48014790
48024791 @final
@@ -5042,10 +5031,10 @@ def _can_use_libjoin(self) -> bool:
50425031 or isinstance (self ._values , (ArrowExtensionArray , BaseMaskedArray ))
50435032 or self .dtype == "string[python]"
50445033 )
5045- # For IntervalIndex, the conversion to numpy converts
5046- # to object dtype, which negates the performance benefit of libjoin
5047- # TODO: exclude RangeIndex and MultiIndex as these also make copies?
5048- return not isinstance (self . dtype , IntervalDtype )
5034+ # Exclude index types where the conversion to numpy converts to object dtype,
5035+ # which negates the performance benefit of libjoin
5036+ # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5037+ return not isinstance (self , ( ABCIntervalIndex , ABCMultiIndex ) )
50495038
50505039 # --------------------------------------------------------------------
50515040 # Uncategorized Methods
@@ -5180,8 +5169,7 @@ def _get_join_target(self) -> np.ndarray:
51805169 # present
51815170 return self ._values .to_numpy ()
51825171
5183- # TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5184- # copies.
5172+ # TODO: exclude ABCRangeIndex case here as it copies
51855173 target = self ._get_engine_target ()
51865174 if not isinstance (target , np .ndarray ):
51875175 raise ValueError ("_can_use_libjoin should return False." )
0 commit comments