Skip to content

Commit 535383f

Browse files
committed
Merge branch 'main' into bug-expansion-dtypes
2 parents 55ca3ab + 5b10ba2 commit 535383f

File tree

14 files changed

+285
-44
lines changed

14 files changed

+285
-44
lines changed

doc/source/user_guide/10min.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ Setting a new column automatically aligns the data by the indexes:
318318

319319
.. ipython:: python
320320
321-
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6))
321+
s1 = pd.Series(
322+
[1, 2, 3, 4, 5, 6],
323+
index=pd.date_range("20130102", periods=6))
322324
s1
323325
df["F"] = s1
324326

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,7 @@ MultiIndex
10571057
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
10581058
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
10591059
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
1060+
- Bug in :meth:`MultiIndex.union` raising when indexes have duplicates with differing names (:issue:`62059`)
10601061
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
10611062
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
10621063
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)

pandas/_config/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ def option_context(*args) -> Generator[None]:
503503
)
504504

505505
ops = tuple(zip(args[::2], args[1::2], strict=True))
506+
undo: tuple[tuple[Any, Any], ...] = ()
506507
try:
507508
undo = tuple((pat, get_option(pat)) for pat, val in ops)
508509
for pat, val in ops:

pandas/core/arrays/sparse/array.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -85,23 +85,17 @@
8585

8686
from pandas.io.formats import printing
8787

88-
# See https://github.com/python/typing/issues/684
8988
if TYPE_CHECKING:
9089
from collections.abc import (
9190
Callable,
9291
Sequence,
9392
)
94-
from enum import Enum
93+
from types import EllipsisType
9594
from typing import (
9695
Protocol,
9796
type_check_only,
9897
)
9998

100-
class ellipsis(Enum):
101-
Ellipsis = "..."
102-
103-
Ellipsis = ellipsis.Ellipsis
104-
10599
from scipy.sparse import (
106100
csc_array,
107101
csc_matrix,
@@ -134,10 +128,6 @@ def tocsc(self, /) -> csc_array | csc_matrix: ...
134128
from pandas import Series
135129

136130

137-
else:
138-
ellipsis = type(Ellipsis)
139-
140-
141131
# ----------------------------------------------------------------------------
142132
# Array
143133

@@ -974,31 +964,22 @@ def __getitem__(self, key: ScalarIndexer) -> Any: ...
974964
@overload
975965
def __getitem__(
976966
self,
977-
key: SequenceIndexer | tuple[int | ellipsis, ...],
967+
key: SequenceIndexer | tuple[int | EllipsisType, ...],
978968
) -> Self: ...
979969

980970
def __getitem__(
981971
self,
982-
key: PositionalIndexer | tuple[int | ellipsis, ...],
972+
key: PositionalIndexer | tuple[int | EllipsisType, ...],
983973
) -> Self | Any:
984974
if isinstance(key, tuple):
985975
key = unpack_tuple_and_ellipses(key)
986-
if key is Ellipsis:
976+
if key is ...:
987977
raise ValueError("Cannot slice with Ellipsis")
988978

989979
if is_integer(key):
990980
return self._get_val_at(key)
991981
elif isinstance(key, tuple):
992-
# error: Invalid index type "Tuple[Union[int, ellipsis], ...]"
993-
# for "ndarray[Any, Any]"; expected type
994-
# "Union[SupportsIndex, _SupportsArray[dtype[Union[bool_,
995-
# integer[Any]]]], _NestedSequence[_SupportsArray[dtype[
996-
# Union[bool_, integer[Any]]]]], _NestedSequence[Union[
997-
# bool, int]], Tuple[Union[SupportsIndex, _SupportsArray[
998-
# dtype[Union[bool_, integer[Any]]]], _NestedSequence[
999-
# _SupportsArray[dtype[Union[bool_, integer[Any]]]]],
1000-
# _NestedSequence[Union[bool, int]]], ...]]"
1001-
data_slice = self.to_dense()[key] # type: ignore[index]
982+
data_slice = self.to_dense()[key]
1002983
elif isinstance(key, slice):
1003984
# Avoid densifying when handling contiguous slices
1004985
if key.step is None or key.step == 1:

pandas/core/dtypes/dtypes.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,8 +2319,35 @@ def kind(self) -> str:
23192319

23202320
@cache_readonly
23212321
def itemsize(self) -> int:
2322-
"""Return the number of bytes in this dtype"""
2323-
return self.numpy_dtype.itemsize
2322+
"""
2323+
Return the number of bytes in this dtype.
2324+
2325+
For Arrow-backed dtypes:
2326+
- Returns the fixed-width bit size divided by 8 for standard fixed-width types.
2327+
- For boolean types, returns the NumPy itemsize.
2328+
- Falls back to the NumPy dtype itemsize for variable-width & unsupported types.
2329+
2330+
Examples
2331+
--------
2332+
>>> import pyarrow as pa
2333+
>>> import pandas as pd
2334+
>>> dtype = pd.ArrowDtype(pa.int32())
2335+
>>> dtype.itemsize
2336+
4
2337+
2338+
>>> dtype = pd.ArrowDtype(pa.bool_())
2339+
>>> dtype.itemsize # falls back to numpy dtype
2340+
1
2341+
"""
2342+
if pa.types.is_boolean(self.pyarrow_dtype):
2343+
return self.numpy_dtype.itemsize
2344+
2345+
# Use pyarrow itemsize for fixed-width data types
2346+
# e.g. int32 -> 32 bits // 8 = 4 bytes
2347+
try:
2348+
return self.pyarrow_dtype.bit_width // 8
2349+
except (ValueError, AttributeError, NotImplementedError):
2350+
return self.numpy_dtype.itemsize
23242351

23252352
def construct_array_type(self) -> type_t[ArrowExtensionArray]:
23262353
"""

pandas/core/generic.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6102,10 +6102,15 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
61026102
"""
61036103
Propagate metadata from other to self.
61046104
6105+
This is the default implementation. Subclasses may override this method to
6106+
implement their own metadata handling.
6107+
61056108
Parameters
61066109
----------
61076110
other : the object from which to get the attributes that we are going
6108-
to propagate
6111+
to propagate. If ``other`` has an ``input_objs`` attribute, then
6112+
this attribute must contain an iterable of objects, each with an
6113+
``attrs`` attribute.
61096114
method : str, optional
61106115
A passed method name providing context on where ``__finalize__``
61116116
was called.
@@ -6114,6 +6119,12 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
61146119
61156120
The value passed as `method` are not currently considered
61166121
stable across pandas releases.
6122+
6123+
Notes
6124+
-----
6125+
In case ``other`` has an ``input_objs`` attribute, this method only
6126+
propagates its metadata if each object in ``input_objs`` has the exact
6127+
same metadata as the others.
61176128
"""
61186129
if isinstance(other, NDFrame):
61196130
if other.attrs:

pandas/core/indexes/multi.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3946,7 +3946,9 @@ def _union(self, other, sort) -> MultiIndex:
39463946
if other.has_duplicates:
39473947
# This is only necessary if other has dupes,
39483948
# otherwise difference is faster
3949-
result = super()._union(other, sort)
3949+
result = super(MultiIndex, self.rename(result_names))._union(
3950+
other.rename(result_names), sort
3951+
)
39503952

39513953
if isinstance(result, MultiIndex):
39523954
return result

pandas/core/nanops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ def nankurt(
13661366
# With a few modifications, like using the maximum value instead of the averages
13671367
# and some adaptations because they use the average and we use the sum for `m2`.
13681368
# We need to estimate an upper bound to the error to consider the data constant.
1369-
# Lets call:
1369+
# Let's call:
13701370
# x: true value in data
13711371
# y: floating point representation
13721372
# e: relative approximation error
@@ -1377,7 +1377,7 @@ def nankurt(
13771377
# (|x - y|/|x|)² <= e²
13781378
# Σ (|x - y|/|x|)² <= ne²
13791379
#
1380-
# Lets say that the fperr upper bound for m2 is constrained by the summation.
1380+
# Let's say that the fperr upper bound for m2 is constrained by the summation.
13811381
# |m2 - y|/|m2| <= ne²
13821382
# |m2 - y| <= n|m2|e²
13831383
#

pandas/core/reshape/merge.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,16 +1129,15 @@ def _reindex_and_concat(
11291129
return result
11301130

11311131
def get_result(self) -> DataFrame:
1132+
"""
1133+
Execute the merge.
1134+
"""
11321135
if self.indicator:
11331136
self.left, self.right = self._indicator_pre_merge(self.left, self.right)
11341137

11351138
join_index, left_indexer, right_indexer = self._get_join_info()
11361139

11371140
result = self._reindex_and_concat(join_index, left_indexer, right_indexer)
1138-
result = result.__finalize__(
1139-
types.SimpleNamespace(input_objs=[self.left, self.right]),
1140-
method=self._merge_type,
1141-
)
11421141

11431142
if self.indicator:
11441143
result = self._indicator_post_merge(result)
@@ -1167,6 +1166,13 @@ def _indicator_name(self) -> str | None:
11671166
def _indicator_pre_merge(
11681167
self, left: DataFrame, right: DataFrame
11691168
) -> tuple[DataFrame, DataFrame]:
1169+
"""
1170+
Add one indicator column to each of the left and right inputs.
1171+
1172+
These columns are used to produce another column in the output of the
1173+
merge, indicating for each row of the output whether it was produced
1174+
using the left, right or both inputs.
1175+
"""
11701176
columns = left.columns.union(right.columns)
11711177

11721178
for i in ["_left_indicator", "_right_indicator"]:
@@ -1193,6 +1199,12 @@ def _indicator_pre_merge(
11931199

11941200
@final
11951201
def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
1202+
"""
1203+
Add an indicator column to the merge result.
1204+
1205+
This column indicates for each row of the output whether it was produced using
1206+
the left, right or both inputs.
1207+
"""
11961208
result["_left_indicator"] = result["_left_indicator"].fillna(0)
11971209
result["_right_indicator"] = result["_right_indicator"].fillna(0)
11981210

pandas/tests/config/test_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,3 +491,9 @@ def test_no_silent_downcasting_deprecated():
491491
cf.get_option("future.no_silent_downcasting")
492492
with tm.assert_produces_warning(Pandas4Warning, match="is deprecated"):
493493
cf.set_option("future.no_silent_downcasting", True)
494+
495+
496+
def test_option_context_invalid_option():
497+
with pytest.raises(OptionError, match="No such keys"):
498+
with cf.option_context("invalid", True):
499+
pass

0 commit comments

Comments
 (0)