@@ -1954,42 +1954,65 @@ def _validate_left_right_on(self, left_on, right_on):
19541954 def _validate_validate_kwd (self , validate : str ) -> None :
19551955 # Check uniqueness of each
19561956 if self .left_index :
1957- left_unique = self .orig_left .index . is_unique
1957+ left_keys = self .orig_left .index
19581958 else :
1959- left_unique = MultiIndex .from_arrays (self .left_join_keys ). is_unique
1959+ left_keys = MultiIndex .from_arrays (self .left_join_keys )
19601960
19611961 if self .right_index :
1962- right_unique = self .orig_right .index . is_unique
1962+ right_keys = self .orig_right .index
19631963 else :
1964- right_unique = MultiIndex .from_arrays (self .right_join_keys ).is_unique
1964+ right_keys = MultiIndex .from_arrays (self .right_join_keys )
1965+
1966+ left_unique = left_keys .is_unique
1967+ right_unique = right_keys .is_unique
1968+
1969+ def sample_duplicates (keys , limit = 10 ):
1970+ """Return up to 'limit' unique duplicate keys."""
1971+ keys = Index (keys )
1972+ dups = keys [keys .duplicated ()]
1973+ if not len (dups ):
1974+ return []
1975+ return list (dups .unique ()[:limit ])
19651976
19661977 # Check data integrity
19671978 if validate in ["one_to_one" , "1:1" ]:
19681979 if not left_unique and not right_unique :
1980+ combined_keys = list (left_keys .append (right_keys ))
1981+ sample = sample_duplicates (combined_keys , limit = 10 )
19691982 raise MergeError (
19701983 "Merge keys are not unique in either left "
19711984 "or right dataset; not a one-to-one merge"
1985+ f"Offending keys (sample): { sample } "
19721986 )
19731987 if not left_unique :
1988+ sample = sample_duplicates (left_keys )
19741989 raise MergeError (
19751990 "Merge keys are not unique in left dataset; not a one-to-one merge"
1991+ f"Offending keys (sample): { sample } "
19761992 )
19771993 if not right_unique :
1994+ sample = sample_duplicates (right_keys )
19781995 raise MergeError (
19791996 "Merge keys are not unique in right dataset; not a one-to-one merge"
1997+ f"Offending keys (sample): { sample } "
19801998 )
19811999
19822000 elif validate in ["one_to_many" , "1:m" ]:
19832001 if not left_unique :
2002+ sample = sample_duplicates (left_keys )
19842003 raise MergeError (
19852004 "Merge keys are not unique in left dataset; not a one-to-many merge"
2005+ f"Offending keys (sample): { sample } "
2006+
19862007 )
19872008
19882009 elif validate in ["many_to_one" , "m:1" ]:
19892010 if not right_unique :
2011+ sample = sample_duplicates (right_keys )
19902012 raise MergeError (
19912013 "Merge keys are not unique in right dataset; "
19922014 "not a many-to-one merge"
2015+ f"Offending keys (sample): { sample } "
19932016 )
19942017
19952018 elif validate in ["many_to_many" , "m:m" ]:
0 commit comments