Skip to content

Commit 0a20c3b

Browse files
Merge branch 'main' into doc/clarify-parentheses-vs-brackets-62314
2 parents 1580059 + 4f4b108 commit 0a20c3b

File tree

17 files changed

+152
-32
lines changed

17 files changed

+152
-32
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,7 @@ Reshaping
10831083
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
10841084
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
10851085
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
1086+
- Bug in :meth:`DataFrame.unstack` raising an error with indexes containing ``NaN`` with ``sort=False`` (:issue:`61221`)
10861087
- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
10871088
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
10881089
- Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`)

environment.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ dependencies:
9191
- sphinx
9292
- sphinx-design
9393
- sphinx-copybutton
94+
95+
# static typing
96+
- scipy-stubs
9497
- types-python-dateutil
9598
- types-PyMySQL
9699
- types-pytz

pandas/_testing/asserters.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -584,19 +584,13 @@ def raise_assert_detail(
584584

585585
if isinstance(left, np.ndarray):
586586
left = pprint_thing(left)
587-
elif isinstance(left, (CategoricalDtype, NumpyEADtype)):
587+
elif isinstance(left, (CategoricalDtype, StringDtype, NumpyEADtype)):
588588
left = repr(left)
589-
elif isinstance(left, StringDtype):
590-
# TODO(infer_string) this special case could be avoided if we have
591-
# a more informative repr https://github.com/pandas-dev/pandas/issues/59342
592-
left = f"StringDtype(storage={left.storage}, na_value={left.na_value})"
593589

594590
if isinstance(right, np.ndarray):
595591
right = pprint_thing(right)
596-
elif isinstance(right, (CategoricalDtype, NumpyEADtype)):
592+
elif isinstance(right, (CategoricalDtype, StringDtype, NumpyEADtype)):
597593
right = repr(right)
598-
elif isinstance(right, StringDtype):
599-
right = f"StringDtype(storage={right.storage}, na_value={right.na_value})"
600594

601595
msg += f"""
602596
[left]: {left}

pandas/core/arrays/boolean.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
lib,
1515
missing as libmissing,
1616
)
17+
from pandas.util._decorators import set_module
1718

1819
from pandas.core.dtypes.common import is_list_like
1920
from pandas.core.dtypes.dtypes import register_extension_dtype
@@ -39,6 +40,7 @@
3940

4041

4142
@register_extension_dtype
43+
@set_module("pandas")
4244
class BooleanDtype(BaseMaskedDtype):
4345
"""
4446
Extension dtype for boolean data.

pandas/core/arrays/floating.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import numpy as np
1010

11+
from pandas.util._decorators import set_module
12+
1113
from pandas.core.dtypes.base import register_extension_dtype
1214
from pandas.core.dtypes.common import is_float_dtype
1315

@@ -168,13 +170,15 @@ class FloatingArray(NumericArray):
168170

169171

170172
@register_extension_dtype
173+
@set_module("pandas")
171174
class Float32Dtype(FloatingDtype):
172175
type = np.float32
173176
name: ClassVar[str] = "Float32"
174177
__doc__ = _dtype_docstring.format(dtype="float32")
175178

176179

177180
@register_extension_dtype
181+
@set_module("pandas")
178182
class Float64Dtype(FloatingDtype):
179183
type = np.float64
180184
name: ClassVar[str] = "Float64"

pandas/core/arrays/integer.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import numpy as np
1010

11+
from pandas.util._decorators import set_module
12+
1113
from pandas.core.dtypes.base import register_extension_dtype
1214
from pandas.core.dtypes.common import is_integer_dtype
1315

@@ -218,55 +220,63 @@ class IntegerArray(NumericArray):
218220

219221

220222
@register_extension_dtype
223+
@set_module("pandas")
221224
class Int8Dtype(IntegerDtype):
222225
type = np.int8
223226
name: ClassVar[str] = "Int8"
224227
__doc__ = _dtype_docstring.format(dtype="int8")
225228

226229

227230
@register_extension_dtype
231+
@set_module("pandas")
228232
class Int16Dtype(IntegerDtype):
229233
type = np.int16
230234
name: ClassVar[str] = "Int16"
231235
__doc__ = _dtype_docstring.format(dtype="int16")
232236

233237

234238
@register_extension_dtype
239+
@set_module("pandas")
235240
class Int32Dtype(IntegerDtype):
236241
type = np.int32
237242
name: ClassVar[str] = "Int32"
238243
__doc__ = _dtype_docstring.format(dtype="int32")
239244

240245

241246
@register_extension_dtype
247+
@set_module("pandas")
242248
class Int64Dtype(IntegerDtype):
243249
type = np.int64
244250
name: ClassVar[str] = "Int64"
245251
__doc__ = _dtype_docstring.format(dtype="int64")
246252

247253

248254
@register_extension_dtype
255+
@set_module("pandas")
249256
class UInt8Dtype(IntegerDtype):
250257
type = np.uint8
251258
name: ClassVar[str] = "UInt8"
252259
__doc__ = _dtype_docstring.format(dtype="uint8")
253260

254261

255262
@register_extension_dtype
263+
@set_module("pandas")
256264
class UInt16Dtype(IntegerDtype):
257265
type = np.uint16
258266
name: ClassVar[str] = "UInt16"
259267
__doc__ = _dtype_docstring.format(dtype="uint16")
260268

261269

262270
@register_extension_dtype
271+
@set_module("pandas")
263272
class UInt32Dtype(IntegerDtype):
264273
type = np.uint32
265274
name: ClassVar[str] = "UInt32"
266275
__doc__ = _dtype_docstring.format(dtype="uint32")
267276

268277

269278
@register_extension_dtype
279+
@set_module("pandas")
270280
class UInt64Dtype(IntegerDtype):
271281
type = np.uint64
272282
name: ClassVar[str] = "UInt64"

pandas/core/dtypes/dtypes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class PandasExtensionDtype(ExtensionDtype):
123123
# problem dealing with multiple inheritance from PandasExtensionDtype
124124
# and ExtensionDtype's @properties in the subclasses below. The kind and
125125
# type variables in those subclasses are explicitly typed below.
126-
subdtype = None
126+
subdtype: DtypeObj | None = None
127127
str: str_type
128128
num = 100
129129
shape: tuple[int, ...] = ()
@@ -1604,7 +1604,7 @@ class BaseMaskedDtype(ExtensionDtype):
16041604
Base class for dtypes for BaseMaskedArray subclasses.
16051605
"""
16061606

1607-
base = None
1607+
base: DtypeObj | None = None
16081608
type: type
16091609
_internal_fill_value: Scalar
16101610

pandas/core/missing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
)
4646

4747
if TYPE_CHECKING:
48+
from collections.abc import Callable
4849
from typing import TypeAlias
4950

5051
from pandas import Index
@@ -548,7 +549,7 @@ def _interpolate_scipy_wrapper(
548549
new_x = np.asarray(new_x)
549550

550551
# ignores some kwargs that could be passed along.
551-
alt_methods = {
552+
alt_methods: dict[str, Callable[..., np.ndarray]] = {
552553
"barycentric": interpolate.barycentric_interpolate,
553554
"krogh": interpolate.krogh_interpolate,
554555
"from_derivatives": _from_derivatives,
@@ -566,6 +567,7 @@ def _interpolate_scipy_wrapper(
566567
"cubic",
567568
"polynomial",
568569
]
570+
terp: Callable[..., np.ndarray] | None
569571
if method in interp1d_methods:
570572
if method == "polynomial":
571573
kind = order

pandas/core/nanops.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,10 @@ def nanmean(
693693
>>> nanops.nanmean(s.values)
694694
np.float64(1.5)
695695
"""
696+
if values.dtype == object and len(values) > 1_000 and mask is None:
697+
# GH#54754 if we are going to fail, try to fail-fast
698+
nanmean(values[:1000], axis=axis, skipna=skipna)
699+
696700
dtype = values.dtype
697701
values, mask = _get_values(values, skipna, fill_value=0, mask=mask)
698702
dtype_sum = _get_dtype_max(dtype)

pandas/core/reshape/reshape.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,11 @@ def __init__(
128128

129129
self.level = self.index._get_level_number(level)
130130

131-
# when index includes `nan`, need to lift levels/strides by 1
132-
self.lift = 1 if -1 in self.index.codes[self.level] else 0
131+
# `nan` values have code `-1`, when sorting, we lift to assign them
132+
# at index 0
133+
self.has_nan = -1 in self.index.codes[self.level]
134+
should_lift = self.has_nan and self.sort
135+
self.lift = 1 if should_lift else 0
133136

134137
# Note: the "pop" below alters these in-place.
135138
self.new_index_levels = list(self.index.levels)
@@ -138,8 +141,16 @@ def __init__(
138141
self.removed_name = self.new_index_names.pop(self.level)
139142
self.removed_level = self.new_index_levels.pop(self.level)
140143
self.removed_level_full = index.levels[self.level]
144+
self.unique_nan_index: int = -1
141145
if not self.sort:
142-
unique_codes = unique(self.index.codes[self.level])
146+
unique_codes: np.ndarray = unique(self.index.codes[self.level])
147+
if self.has_nan:
148+
# drop nan codes, because they are not represented in level
149+
nan_mask = unique_codes == -1
150+
151+
unique_codes = unique_codes[~nan_mask]
152+
self.unique_nan_index = np.flatnonzero(nan_mask)[0]
153+
143154
self.removed_level = self.removed_level.take(unique_codes)
144155
self.removed_level_full = self.removed_level_full.take(unique_codes)
145156

@@ -210,7 +221,7 @@ def _make_selectors(self) -> None:
210221
ngroups = len(obs_ids)
211222

212223
comp_index = ensure_platform_int(comp_index)
213-
stride = self.index.levshape[self.level] + self.lift
224+
stride = self.index.levshape[self.level] + self.has_nan
214225
self.full_shape = ngroups, stride
215226

216227
selector = self.sorted_labels[-1] + stride * comp_index + self.lift
@@ -362,13 +373,13 @@ def get_new_values(self, values, fill_value=None):
362373

363374
def get_new_columns(self, value_columns: Index | None):
364375
if value_columns is None:
365-
if self.lift == 0:
376+
if not self.has_nan:
366377
return self.removed_level._rename(name=self.removed_name)
367378

368379
lev = self.removed_level.insert(0, item=self.removed_level._na_value)
369380
return lev.rename(self.removed_name)
370381

371-
stride = len(self.removed_level) + self.lift
382+
stride = len(self.removed_level) + self.has_nan
372383
width = len(value_columns)
373384
propagator = np.repeat(np.arange(width), stride)
374385

@@ -401,12 +412,21 @@ def _repeater(self) -> np.ndarray:
401412
if len(self.removed_level_full) != len(self.removed_level):
402413
# In this case, we remap the new codes to the original level:
403414
repeater = self.removed_level_full.get_indexer(self.removed_level)
404-
if self.lift:
415+
if self.has_nan:
416+
# insert nan index at first position
405417
repeater = np.insert(repeater, 0, -1)
406418
else:
407419
# Otherwise, we just use each level item exactly once:
408-
stride = len(self.removed_level) + self.lift
420+
stride = len(self.removed_level) + self.has_nan
409421
repeater = np.arange(stride) - self.lift
422+
if self.has_nan and not self.sort:
423+
assert self.unique_nan_index > -1, (
424+
"`unique_nan_index` not properly initialized"
425+
)
426+
# assign -1 where should be nan according to the unique values.
427+
repeater[self.unique_nan_index] = -1
428+
# compensate for the removed index level
429+
repeater[self.unique_nan_index + 1 :] -= 1
410430

411431
return repeater
412432

0 commit comments

Comments
 (0)