Skip to content

Commit 7137897

Browse files
Merge branch 'main' into main
2 parents 96110e9 + 36f5e25 commit 7137897

File tree

10 files changed

+93
-26
lines changed

10 files changed

+93
-26
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,7 @@ Interval
10491049
- Bug in :class:`Index`, :class:`Series`, :class:`DataFrame` constructors when given a sequence of :class:`Interval` subclass objects casting them to :class:`Interval` (:issue:`46945`)
10501050
- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`)
10511051
- Bug in :meth:`IntervalIndex.get_indexer` and :meth:`IntervalIndex.drop` when one of the sides of the index is non-unique (:issue:`52245`)
1052+
- Construction of :class:`IntervalArray` and :class:`IntervalIndex` from arrays with mismatched signed/unsigned integer dtypes (e.g., ``int64`` and ``uint64``) now raises a :exc:`TypeError` instead of proceeding silently. (:issue:`55715`)
10521053

10531054
Indexing
10541055
^^^^^^^^
@@ -1236,6 +1237,7 @@ Other
12361237
- Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`)
12371238
- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
12381239
- Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`)
1240+
- Bug in :meth:`DataFrame.rename` and :meth:`Series.rename` when passed a ``mapper``, ``index``, or ``columns`` argument that is a :class:`Series` with non-unique ``ser.index`` producing a corrupted result instead of raising ``ValueError`` (:issue:`58621`)
12391241
- Bug in :meth:`DataFrame.sample` with ``replace=False`` and ``(n * max(weights) / sum(weights)) > 1``, the method would return biased results. Now raises ``ValueError``. (:issue:`61516`)
12401242
- Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`)
12411243
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)

pandas/core/arrays/interval.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,18 @@ def _ensure_simple_new_inputs(
420420

421421
dtype = IntervalDtype(left.dtype, closed=closed)
422422

423+
# Check for mismatched signed/unsigned integer dtypes after casting
424+
left_dtype = left.dtype
425+
right_dtype = right.dtype
426+
if (
427+
left_dtype.kind in "iu"
428+
and right_dtype.kind in "iu"
429+
and left_dtype.kind != right_dtype.kind
430+
):
431+
raise TypeError(
432+
f"Left and right arrays must have matching signedness. "
433+
f"Got {left_dtype} and {right_dtype}."
434+
)
423435
return left, right, dtype
424436

425437
@classmethod

pandas/core/generic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,10 @@ def _rename(
10541054
if level is not None:
10551055
level = ax._get_level_number(level)
10561056

1057+
if isinstance(replacements, ABCSeries) and not replacements.index.is_unique:
1058+
# GH#58621
1059+
raise ValueError("Cannot rename with a Series with non-unique index.")
1060+
10571061
# GH 13473
10581062
if not callable(replacements):
10591063
if ax._is_multi and level is not None:

pandas/core/indexes/range.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ class RangeIndex(Index):
8888
8989
Parameters
9090
----------
91-
start : int (default: 0), range, or other RangeIndex instance
91+
start : int, range, or other RangeIndex instance, default None
9292
If int and "stop" is not given, interpreted as "stop" instead.
93-
stop : int (default: 0)
93+
stop : int, default None
9494
The end value of the range (exclusive).
95-
step : int (default: 1)
95+
step : int, default None
9696
The step size of the range.
97-
dtype : np.int64
97+
dtype : np.int64, default None
9898
Unused, accepted for homogeneity with other index types.
9999
copy : bool, default False
100100
Unused, accepted for homogeneity with other index types.

pandas/tests/arrays/sparse/test_constructors.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ def test_constructor_object_dtype_bool_fill(self):
7777
assert arr.dtype == SparseDtype(object, False)
7878
assert arr.fill_value is False
7979
arr_expected = np.array(data, dtype=object)
80-
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
80+
it = (
81+
type(x) == type(y) and x == y
82+
for x, y in zip(arr, arr_expected, strict=True)
83+
)
8184
assert np.fromiter(it, dtype=np.bool_).all()
8285

8386
@pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])

pandas/tests/arrays/test_datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def test_iter(self, dta):
144144
def test_astype_object(self, dta):
145145
result = dta.astype(object)
146146
assert all(x._creso == dta._creso for x in result)
147-
assert all(x == y for x, y in zip(result, dta))
147+
assert all(x == y for x, y in zip(result, dta, strict=True))
148148

149149
def test_to_pydatetime(self, dta_dti):
150150
dta, dti = dta_dti

pandas/tests/extension/base/setitem.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -221,28 +221,28 @@ def test_setitem_integer_array_with_repeats(self, data, idx, box_in_series):
221221
tm.assert_equal(arr, expected)
222222

223223
@pytest.mark.parametrize(
224-
"idx, box_in_series",
224+
"idx",
225225
[
226-
([0, 1, 2, pd.NA], False),
227-
pytest.param(
228-
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
229-
),
230-
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
231-
# TODO: change False to True?
232-
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), # noqa: PT014
226+
[0, 1, 2, pd.NA],
227+
pd.array([0, 1, 2, pd.NA], dtype="Int64"),
233228
],
234-
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
229+
ids=["list", "integer-array"],
235230
)
231+
@pytest.mark.parametrize("box_in_series", [True, False])
236232
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
237233
arr = data.copy()
238234

239-
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
240-
# for list of labels with Series
235+
msg = "Cannot index with an integer indexer containing NA values"
236+
err = ValueError
237+
241238
if box_in_series:
239+
# The integer labels are not present in the (string) index, so
240+
# we get KeyErrors
242241
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
242+
msg = "0"
243+
err = KeyError
243244

244-
msg = "Cannot index with an integer indexer containing NA values"
245-
with pytest.raises(ValueError, match=msg):
245+
with pytest.raises(err, match=msg):
246246
arr[idx] = arr[0]
247247

248248
@pytest.mark.parametrize("as_callable", [True, False])

pandas/tests/extension/json/test_json.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -356,18 +356,25 @@ def test_setitem_integer_array(self, data, idx, box_in_series, request):
356356
request.applymarker(mark)
357357
super().test_setitem_integer_array(data, idx, box_in_series)
358358

359-
@pytest.mark.xfail(reason="list indices must be integers or slices, not NAType")
360359
@pytest.mark.parametrize(
361-
"idx, box_in_series",
360+
"idx",
361+
[
362+
[0, 1, 2, pd.NA],
363+
pd.array([0, 1, 2, pd.NA], dtype="Int64"),
364+
],
365+
ids=["list", "integer-array"],
366+
)
367+
@pytest.mark.parametrize(
368+
"box_in_series",
362369
[
363-
([0, 1, 2, pd.NA], False),
370+
True,
364371
pytest.param(
365-
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
372+
False,
373+
marks=pytest.mark.xfail(
374+
reason="list indices must be integers or slices, not NAType"
375+
),
366376
),
367-
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
368-
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), True),
369377
],
370-
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
371378
)
372379
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
373380
super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)

pandas/tests/frame/methods/test_rename.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DataFrame,
99
Index,
1010
MultiIndex,
11+
Series,
1112
merge,
1213
)
1314
import pandas._testing as tm
@@ -409,3 +410,33 @@ def test_rename_boolean_index(self):
409410
index=["foo", "bar", "bah"],
410411
)
411412
tm.assert_frame_equal(res, exp)
413+
414+
def test_rename_non_unique_index_series(self):
415+
# GH#58621
416+
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
417+
orig = df.copy(deep=True)
418+
419+
rename_series = Series(["X", "Y", "Z", "W"], index=["A", "B", "B", "C"])
420+
421+
msg = "Cannot rename with a Series with non-unique index"
422+
with pytest.raises(ValueError, match=msg):
423+
df.rename(rename_series)
424+
with pytest.raises(ValueError, match=msg):
425+
df.rename(columns=rename_series)
426+
with pytest.raises(ValueError, match=msg):
427+
df.rename(columns=rename_series, inplace=True)
428+
429+
# check we didn't corrupt the original
430+
tm.assert_frame_equal(df, orig)
431+
432+
# Check the Series method while we're here
433+
ser = df.iloc[0]
434+
with pytest.raises(ValueError, match=msg):
435+
ser.rename(rename_series)
436+
with pytest.raises(ValueError, match=msg):
437+
ser.rename(index=rename_series)
438+
with pytest.raises(ValueError, match=msg):
439+
ser.rename(index=rename_series, inplace=True)
440+
441+
# check we didn't corrupt the original
442+
tm.assert_series_equal(ser, orig.iloc[0])

pandas/tests/indexes/interval/test_interval.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,14 @@ def test_is_all_dates(self):
882882
assert not year_2017_index._is_all_dates
883883

884884

885+
def test_from_arrays_mismatched_signedness_raises():
886+
# GH 55715
887+
left = np.array([0, 1, 2], dtype="int64")
888+
right = np.array([1, 2, 3], dtype="uint64")
889+
with pytest.raises(TypeError, match="matching signedness"):
890+
IntervalIndex.from_arrays(left, right)
891+
892+
885893
def test_dir():
886894
# GH#27571 dir(interval_index) should not raise
887895
index = IntervalIndex.from_arrays([0, 1], [1, 2])

0 commit comments

Comments
 (0)