124124from pandas .core .dtypes .generic import (
125125 ABCDataFrame ,
126126 ABCDatetimeIndex ,
127+ ABCIntervalIndex ,
127128 ABCMultiIndex ,
128129 ABCPeriodIndex ,
129130 ABCSeries ,
@@ -3491,8 +3492,6 @@ def _intersection(self, other: Index, sort: bool = False):
34913492 and other .is_monotonic_increasing
34923493 and self ._can_use_libjoin
34933494 and other ._can_use_libjoin
3494- and not isinstance (self , ABCMultiIndex )
3495- and not isinstance (other , ABCMultiIndex )
34963495 ):
34973496 try :
34983497 res_indexer , indexer , _ = self ._inner_indexer (other )
@@ -4631,28 +4630,13 @@ def join(
46314630
46324631 _validate_join_method (how )
46334632
4634- if not self .is_unique and not other .is_unique :
4635- return self ._join_non_unique (other , how = how , sort = sort )
4636- elif not self .is_unique or not other .is_unique :
4637- if self .is_monotonic_increasing and other .is_monotonic_increasing :
4638- # Note: 2023-08-15 we *do* have tests that get here with
4639- # Categorical, string[python] (can use libjoin)
4640- # and Interval (cannot)
4641- if self ._can_use_libjoin and other ._can_use_libjoin :
4642- # otherwise we will fall through to _join_via_get_indexer
4643- # GH#39133
4644- # go through object dtype for ea till engine is supported properly
4645- return self ._join_monotonic (other , how = how )
4646- else :
4647- return self ._join_non_unique (other , how = how , sort = sort )
4648- elif (
4649- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4650- self .is_monotonic_increasing
4633+ if (
4634+ not isinstance (self .dtype , CategoricalDtype )
4635+ and self .is_monotonic_increasing
46514636 and other .is_monotonic_increasing
46524637 and self ._can_use_libjoin
46534638 and other ._can_use_libjoin
4654- and not isinstance (self , ABCMultiIndex )
4655- and not isinstance (self .dtype , CategoricalDtype )
4639+ and (self .is_unique or other .is_unique )
46564640 ):
46574641 # Categorical is monotonic if data are ordered as categories, but join can
46584642 # not handle this in case of not lexicographically monotonic GH#38502
@@ -4661,6 +4645,8 @@ def join(
46614645 except TypeError :
46624646 # object dtype; non-comparable objects
46634647 pass
4648+ elif not self .is_unique or not other .is_unique :
4649+ return self ._join_non_unique (other , how = how , sort = sort )
46644650
46654651 return self ._join_via_get_indexer (other , how , sort )
46664652
@@ -4796,6 +4782,9 @@ def _join_non_unique(
47964782 join_idx = self .take (left_idx )
47974783 right = other .take (right_idx )
47984784 join_index = join_idx .putmask (mask , right )
4785+ if isinstance (join_index , ABCMultiIndex ) and how == "outer" :
4786+ # test_join_index_levels
4787+ join_index = join_index ._sort_levels_monotonic ()
47994788 return join_index , left_idx , right_idx
48004789
48014790 @final
@@ -5041,10 +5030,10 @@ def _can_use_libjoin(self) -> bool:
50415030 or isinstance (self ._values , (ArrowExtensionArray , BaseMaskedArray ))
50425031 or self .dtype == "string[python]"
50435032 )
5044- # For IntervalIndex, the conversion to numpy converts
5045- # to object dtype, which negates the performance benefit of libjoin
5046- # TODO: exclude RangeIndex and MultiIndex as these also make copies?
5047- return not isinstance (self . dtype , IntervalDtype )
5033+ # Exclude index types where the conversion to numpy converts to object dtype,
5034+ # which negates the performance benefit of libjoin
5035+ # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5036+ return not isinstance (self , ( ABCIntervalIndex , ABCMultiIndex ) )
50485037
50495038 # --------------------------------------------------------------------
50505039 # Uncategorized Methods
@@ -5179,8 +5168,7 @@ def _get_join_target(self) -> np.ndarray:
51795168 # present
51805169 return self ._values .to_numpy ()
51815170
5182- # TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5183- # copies.
5171+ # TODO: exclude ABCRangeIndex case here as it copies
51845172 target = self ._get_engine_target ()
51855173 if not isinstance (target , np .ndarray ):
51865174 raise ValueError ("_can_use_libjoin should return False." )
0 commit comments