Skip to content

Commit d954167

Browse files
committed
Improve implementation & add another test
1 parent ed78f3d commit d954167

File tree

3 files changed

+34
-5
lines changed

3 files changed

+34
-5
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,7 @@ Groupby/resample/rolling
11831183

11841184
Reshaping
11851185
^^^^^^^^^
1186+
- Bug in :func:`DataFrame.join` not producing the correct row order when joining with a list of Series/DataFrames (:issue:`62954)
11861187
- Bug in :func:`concat` with mixed integer and bool dtypes incorrectly casting the bools to integers (:issue:`45101`)
11871188
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
11881189
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)

pandas/core/frame.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11389,17 +11389,18 @@ def join(
1138911389
# "Iterable[Union[DataFrame, Series]]" due to the if statements
1139011390
frames = [cast("DataFrame | Series", self)] + list(other)
1139111391

11392-
can_concat = (how != "right") and all(df.index.is_unique for df in frames)
11392+
can_concat = all(df.index.is_unique for df in frames)
1139311393

1139411394
# join indexes only using concat
1139511395
if can_concat:
11396-
if how == "left":
11396+
if how == "left" or how == "right":
1139711397
res = concat(
1139811398
frames, axis=1, join="outer", verify_integrity=True, sort=sort
1139911399
)
11400-
result = res.reindex(self.index)
11400+
index = self.index if how == "left" else frames[-1].index
1140111401
if sort:
11402-
result = result.sort_index()
11402+
index = index.sort_values()
11403+
result = res.reindex(index)
1140311404
return result
1140411405
else:
1140511406
if how == "outer":

pandas/tests/reshape/merge/test_join.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def _check_diff_index(df_list, result, exp_index):
672672
df_list[0].join(df_list[1:], on="a")
673673

674674
@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
675-
def test_join_many_sort(self, how, sort):
675+
def test_join_many_sort_unique(self, how, sort):
676676
df = DataFrame({"a": [1, 2, 3]}, index=[1, 0, 2])
677677
df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1])
678678
if how == "right":
@@ -685,6 +685,33 @@ def test_join_many_sort(self, how, sort):
685685
result = df.join([df2], how=how, sort=sort)
686686
tm.assert_frame_equal(result, expected)
687687

688+
@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
689+
def test_join_many_sort_nonunique(self, how, sort):
690+
df = DataFrame({"a": [1, 2, 3]}, index=[3, 0, 0])
691+
df2 = DataFrame({"b": [4, 5, 6]}, index=[2, 0, 1])
692+
if how == "inner":
693+
expected = DataFrame({"a": [2, 3], "b": [5, 5]}, index=[0, 0])
694+
elif how == "left":
695+
expected = DataFrame(
696+
{"a": [1, 2, 3], "b": [np.nan, 5.0, 5.0]}, index=[3, 0, 0]
697+
)
698+
elif how == "right":
699+
expected = DataFrame(
700+
{"a": [np.nan, 2.0, 3.0, np.nan], "b": [4, 5, 5, 6]}, index=[2, 0, 0, 1]
701+
)
702+
else:
703+
expected = DataFrame(
704+
{
705+
"a": [2.0, 3.0, np.nan, np.nan, 1.0],
706+
"b": [5.0, 5.0, 6.0, 4.0, np.nan],
707+
},
708+
index=[0, 0, 1, 2, 3],
709+
)
710+
if sort:
711+
expected = expected.sort_index()
712+
result = df.join([df2], how=how, sort=sort)
713+
tm.assert_frame_equal(result, expected)
714+
688715
def test_join_many_mixed(self):
689716
df = DataFrame(
690717
np.random.default_rng(2).standard_normal((8, 4)),

0 commit comments

Comments
 (0)