Skip to content

Commit b13ac0f

Browse files
authored
Merge branch 'main' into sty-b905
2 parents 7b793d5 + 3157d07 commit b13ac0f

File tree

10 files changed

+230
-86
lines changed

10 files changed

+230
-86
lines changed

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 2.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v2.3.4
2728
v2.3.3
2829
v2.3.2
2930
v2.3.1

doc/source/whatsnew/v2.3.4.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.. _whatsnew_234:
2+
3+
What's new in 2.3.4 (November XX, 2025)
4+
----------------------------------------
5+
6+
These are the changes in pandas 2.3.4. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
Bug fixes
14+
^^^^^^^^^
15+
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
16+
17+
.. ---------------------------------------------------------------------------
18+
.. _whatsnew_234.contributors:
19+
20+
Contributors
21+
~~~~~~~~~~~~

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,6 @@ Interval
11321132

11331133
Indexing
11341134
^^^^^^^^
1135-
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
11361135
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
11371136
- Bug in :meth:`DataFrame.__setitem__` on an empty :class:`DataFrame` with a tuple corrupting the frame (:issue:`54385`)
11381137
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
@@ -1260,6 +1259,7 @@ Groupby/resample/rolling
12601259
- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
12611260
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12621261
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
1262+
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
12631263

12641264
Reshaping
12651265
^^^^^^^^^

pandas/_libs/lib.pyx

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2716,18 +2716,16 @@ def maybe_convert_objects(ndarray[object] objects,
27162716
if convert_non_numeric:
27172717
if getattr(val, "tzinfo", None) is not None:
27182718
seen.datetimetz_ = True
2719-
break
27202719
else:
27212720
seen.datetime_ = True
27222721
try:
27232722
convert_to_tsobject(val, None, None, 0, 0)
27242723
except OutOfBoundsDatetime:
27252724
# e.g. test_out_of_s_bounds_datetime64
27262725
seen.object_ = True
2727-
break
27282726
else:
27292727
seen.object_ = True
2730-
break
2728+
break
27312729
elif is_period_object(val):
27322730
if convert_non_numeric:
27332731
seen.period_ = True

pandas/_libs/tslibs/offsets.pyx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5202,6 +5202,32 @@ deprec_to_valid_alias = {
52025202
"L": "ms",
52035203
"U": "us",
52045204
"N": "ns",
5205+
"AS": "YS",
5206+
"AS-JAN": "YS-JAN",
5207+
"AS-FEB": "YS-FEB",
5208+
"AS-MAR": "YS-MAR",
5209+
"AS-APR": "YS-APR",
5210+
"AS-MAY": "YS-MAY",
5211+
"AS-JUN": "YS-JUN",
5212+
"AS-JUL": "YS-JUL",
5213+
"AS-AUG": "YS-AUG",
5214+
"AS-SEP": "YS-SEP",
5215+
"AS-OCT": "YS-OCT",
5216+
"AS-NOV": "YS-NOV",
5217+
"AS-DEC": "YS-DEC",
5218+
"A": "Y",
5219+
"A-JAN": "Y-JAN",
5220+
"A-FEB": "Y-FEB",
5221+
"A-MAR": "Y-MAR",
5222+
"A-APR": "Y-APR",
5223+
"A-MAY": "Y-MAY",
5224+
"A-JUN": "Y-JUN",
5225+
"A-JUL": "Y-JUL",
5226+
"A-AUG": "Y-AUG",
5227+
"A-SEP": "Y-SEP",
5228+
"A-OCT": "Y-OCT",
5229+
"A-NOV": "Y-NOV",
5230+
"A-DEC": "Y-DEC",
52055231
}
52065232

52075233

pandas/_libs/window/aggregations.pyx

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -332,19 +332,13 @@ cdef float64_t calc_var(
332332
int ddof,
333333
float64_t nobs,
334334
float64_t ssqdm_x,
335-
int64_t num_consecutive_same_value
336335
) noexcept nogil:
337336
cdef:
338337
float64_t result
339338

340339
# Variance is unchanged if no observation is added or removed
341340
if (nobs >= minp) and (nobs > ddof):
342-
343-
# pathological case & repeatedly same values case
344-
if nobs == 1 or num_consecutive_same_value >= nobs:
345-
result = 0
346-
else:
347-
result = ssqdm_x / (nobs - <float64_t>ddof)
341+
result = ssqdm_x / (nobs - <float64_t>ddof)
348342
else:
349343
result = NaN
350344

@@ -357,27 +351,19 @@ cdef void add_var(
357351
float64_t *mean_x,
358352
float64_t *ssqdm_x,
359353
float64_t *compensation,
360-
int64_t *num_consecutive_same_value,
361-
float64_t *prev_value,
354+
bint *numerically_unstable,
362355
) noexcept nogil:
363356
""" add a value from the var calc """
364357
cdef:
365358
float64_t delta, prev_mean, y, t
359+
float64_t prev_m2 = ssqdm_x[0]
366360

367361
# GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan`
368362
if val != val:
369363
return
370364

371365
nobs[0] = nobs[0] + 1
372366

373-
# GH#42064, record num of same values to remove floating point artifacts
374-
if val == prev_value[0]:
375-
num_consecutive_same_value[0] += 1
376-
else:
377-
# reset to 1 (include current value itself)
378-
num_consecutive_same_value[0] = 1
379-
prev_value[0] = val
380-
381367
# Welford's method for the online variance-calculation
382368
# using Kahan summation
383369
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
@@ -392,17 +378,23 @@ cdef void add_var(
392378
mean_x[0] = 0
393379
ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])
394380

381+
if prev_m2 * InvCondTol > ssqdm_x[0]:
382+
# possible catastrophic cancellation
383+
numerically_unstable[0] = True
384+
395385

396386
cdef void remove_var(
397387
float64_t val,
398388
float64_t *nobs,
399389
float64_t *mean_x,
400390
float64_t *ssqdm_x,
401-
float64_t *compensation
391+
float64_t *compensation,
392+
bint *numerically_unstable,
402393
) noexcept nogil:
403394
""" remove a value from the var calc """
404395
cdef:
405396
float64_t delta, prev_mean, y, t
397+
float64_t prev_m2 = ssqdm_x[0]
406398
if val == val:
407399
nobs[0] = nobs[0] - 1
408400
if nobs[0]:
@@ -416,9 +408,14 @@ cdef void remove_var(
416408
delta = t
417409
mean_x[0] = mean_x[0] - delta / nobs[0]
418410
ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0])
411+
412+
if prev_m2 * InvCondTol > ssqdm_x[0]:
413+
# possible catastrophic cancellation
414+
numerically_unstable[0] = True
419415
else:
420416
mean_x[0] = 0
421417
ssqdm_x[0] = 0
418+
numerically_unstable[0] = False
422419

423420

424421
def roll_var(const float64_t[:] values, ndarray[int64_t] start,
@@ -428,11 +425,12 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
428425
"""
429426
cdef:
430427
float64_t mean_x, ssqdm_x, nobs, compensation_add,
431-
float64_t compensation_remove, prev_value
432-
int64_t s, e, num_consecutive_same_value
428+
float64_t compensation_remove
429+
int64_t s, e
433430
Py_ssize_t i, j, N = len(start)
434431
ndarray[float64_t] output
435432
bint is_monotonic_increasing_bounds
433+
bint requires_recompute, numerically_unstable
436434

437435
minp = max(minp, 1)
438436
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
@@ -449,32 +447,35 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
449447

450448
# Over the first window, observations can only be added
451449
# never removed
452-
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
453-
454-
prev_value = values[s]
455-
num_consecutive_same_value = 0
456-
457-
mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
458-
for j in range(s, e):
459-
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
460-
&num_consecutive_same_value, &prev_value)
461-
462-
else:
450+
requires_recompute = (
451+
i == 0
452+
or not is_monotonic_increasing_bounds
453+
or s >= end[i - 1]
454+
)
463455

456+
if not requires_recompute:
464457
# After the first window, observations can both be added
465458
# and removed
466459

467460
# calculate deletes
468461
for j in range(start[i - 1], s):
469462
remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
470-
&compensation_remove)
463+
&compensation_remove, &numerically_unstable)
471464

472465
# calculate adds
473466
for j in range(end[i - 1], e):
474467
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
475-
&num_consecutive_same_value, &prev_value)
468+
&numerically_unstable)
469+
470+
if requires_recompute or numerically_unstable:
471+
472+
mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
473+
for j in range(s, e):
474+
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
475+
&numerically_unstable)
476+
numerically_unstable = False
476477

477-
output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value)
478+
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
478479

479480
if not is_monotonic_increasing_bounds:
480481
nobs = 0.0

pandas/tests/indexes/datetimes/test_date_range.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,22 @@ def test_date_range_depr_lowercase_frequency(self, freq, freq_depr):
814814
result = date_range("1/1/2000", periods=4, freq=freq_depr)
815815
tm.assert_index_equal(result, expected)
816816

817+
@pytest.mark.parametrize(
818+
"freq_removed,freq",
819+
[
820+
("100A", "Y"),
821+
("2A-DEC", "Y-DEC"),
822+
("100AS", "YS"),
823+
("2AS-MAY", "YS-MAY"),
824+
],
825+
)
826+
def test_error_message_for_removed_year_yearbegin_frequencies(
827+
self, freq, freq_removed
828+
):
829+
msg = f"Did you mean {freq}"
830+
with pytest.raises(ValueError, match=msg):
831+
date_range("1/1/2000", periods=2, freq=freq_removed)
832+
817833

818834
class TestDateRangeTZ:
819835
"""Tests for date_range with timezones"""

pandas/tests/io/pytables/test_categorical.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,22 @@
44
from pandas import (
55
Categorical,
66
DataFrame,
7+
HDFStore,
78
Series,
89
_testing as tm,
910
concat,
1011
read_hdf,
1112
)
1213
from pandas.tests.io.pytables.common import (
1314
_maybe_remove,
14-
ensure_clean_store,
1515
)
1616

1717
pytestmark = [pytest.mark.single_cpu]
1818

1919

20-
def test_categorical(setup_path):
21-
with ensure_clean_store(setup_path) as store:
20+
def test_categorical(tmp_path):
21+
path = tmp_path / "test_categorical.h5"
22+
with HDFStore(path) as store:
2223
# Basic
2324
_maybe_remove(store, "s")
2425
s = Series(

0 commit comments

Comments
 (0)