Skip to content

Commit b061f7e

Browse files
author
test
committed
A concise summary of the fix
1 parent 6e6cb21 commit b061f7e

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

.gitconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[user]
2+
email = test@example.com
3+
name = test
4+
[pull]
5+
rebase = false
6+
[push]
7+
default = simple

pandas/core/reshape/merge.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,25 @@ def _validate_left_right_on(self, left_on, right_on):
19281928

19291929
return left_on, right_on
19301930

1931+
def _get_dupes(self, keys: list["ArrayLike"]) -> list:
1932+
from pandas import MultiIndex
1933+
1934+
multi_index = MultiIndex.from_arrays(keys)
1935+
dupes = multi_index[multi_index.duplicated()].unique()
1936+
if isinstance(dupes, MultiIndex):
1937+
return dupes.to_list()
1938+
return dupes.tolist()
1939+
1940+
1941+
def _get_dupes(self, keys: list["ArrayLike"]) -> list:
1942+
from pandas import MultiIndex
1943+
1944+
multi_index = MultiIndex.from_arrays(keys)
1945+
dupes = multi_index[multi_index.duplicated()].unique()
1946+
if isinstance(dupes, MultiIndex):
1947+
return dupes.to_list()
1948+
return dupes.tolist()
1949+
19311950
@final
19321951
def _validate_validate_kwd(self, validate: str) -> None:
19331952
# Check uniqueness of each
@@ -1944,30 +1963,42 @@ def _validate_validate_kwd(self, validate: str) -> None:
19441963
# Check data integrity
19451964
if validate in ["one_to_one", "1:1"]:
19461965
if not left_unique and not right_unique:
1966+
left_dupes = self._get_dupes(self.left_join_keys)
1967+
right_dupes = self._get_dupes(self.right_join_keys)
19471968
raise MergeError(
19481969
"Merge keys are not unique in either left "
1949-
"or right dataset; not a one-to-one merge"
1970+
"or right dataset; not a one-to-one merge. "
1971+
f"Left duplicate keys: {left_dupes}. "
1972+
f"Right duplicate keys: {right_dupes}."
19501973
)
19511974
if not left_unique:
1975+
left_dupes = self._get_dupes(self.left_join_keys)
19521976
raise MergeError(
1953-
"Merge keys are not unique in left dataset; not a one-to-one merge"
1977+
"Merge keys are not unique in left dataset; not a one-to-one merge. "
1978+
f"Duplicate keys: {left_dupes}."
19541979
)
19551980
if not right_unique:
1981+
right_dupes = self._get_dupes(self.right_join_keys)
19561982
raise MergeError(
1957-
"Merge keys are not unique in right dataset; not a one-to-one merge"
1983+
"Merge keys are not unique in right dataset; not a one-to-one merge. "
1984+
f"Duplicate keys: {right_dupes}."
19581985
)
19591986

19601987
elif validate in ["one_to_many", "1:m"]:
19611988
if not left_unique:
1989+
left_dupes = self._get_dupes(self.left_join_keys)
19621990
raise MergeError(
1963-
"Merge keys are not unique in left dataset; not a one-to-many merge"
1991+
"Merge keys are not unique in left dataset; not a one-to-many merge. "
1992+
f"Duplicate keys: {left_dupes}."
19641993
)
19651994

19661995
elif validate in ["many_to_one", "m:1"]:
19671996
if not right_unique:
1997+
right_dupes = self._get_dupes(self.right_join_keys)
19681998
raise MergeError(
19691999
"Merge keys are not unique in right dataset; "
1970-
"not a many-to-one merge"
2000+
"not a many-to-one merge. "
2001+
f"Duplicate keys: {right_dupes}."
19712002
)
19722003

19732004
elif validate in ["many_to_many", "m:m"]:

0 commit comments

Comments
 (0)