Skip to content

Commit b62d687

Browse files
DEPR: Deprecate non-ISO date string formats in DatetimeIndex.loc
1 parent 9f66b81 commit b62d687

File tree

3 files changed

+146
-1
lines changed

3 files changed

+146
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -739,13 +739,13 @@ Other Deprecations
739739
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
740740
- Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`)
741741
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
742+
- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`)
742743
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
743744
- Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`)
744745
- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`)
745746
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
746747
- Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`)
747748
- Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`)
748-
749749
.. ---------------------------------------------------------------------------
750750
.. _whatsnew_300.prior_deprecations:
751751

pandas/core/indexes/datetimes.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import datetime as dt
44
import operator
5+
import re
56
from typing import (
67
TYPE_CHECKING,
78
Self,
@@ -110,6 +111,28 @@ def _new_DatetimeIndex(cls, d):
110111
return result
111112

112113

114+
def _is_iso_format_string(date_str: str) -> bool:
115+
"""
116+
Check if a date string follows ISO8601 format.
117+
118+
ISO format must start with a 4-digit year (YYYY), optionally followed by
119+
month and day with consistent separators.
120+
121+
Examples of ISO format (True):
122+
- 2024-01-10
123+
- 2024/01/10
124+
- 2024 01 10
125+
- 2024-01-10T00:00:00
126+
127+
Examples of non-ISO format (False):
128+
- 01/10/2024 (MM/DD/YYYY)
129+
- 10/01/2024 (DD/MM/YYYY)
130+
- 01-10-2024 (MM-DD-YYYY)
131+
"""
132+
# ISO format must start with 4-digit year followed by separator (-, /, ., or space)
133+
return re.match(r"^\d{4}[-/. ]", date_str) is not None
134+
135+
113136
@inherit_names(
114137
DatetimeArray._field_ops
115138
+ [
@@ -566,6 +589,15 @@ def _parsed_string_to_bounds(
566589
return start, end
567590

568591
def _parse_with_reso(self, label: str) -> tuple[Timestamp, Resolution]:
592+
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
593+
if isinstance(label, str) and not _is_iso_format_string(label):
594+
msg = (
595+
"Parsing non-ISO datetime strings in .loc is deprecated and will be "
596+
"removed in a future version. Use ISO format (YYYY-MM-DD) instead. "
597+
f"Got '{label}'."
598+
)
599+
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
600+
569601
parsed, reso = super()._parse_with_reso(label)
570602

571603
parsed = Timestamp(parsed)
@@ -701,11 +733,29 @@ def check_str_or_none(point) -> bool:
701733
mask = np.array(True)
702734
in_index = True
703735
if start is not None:
736+
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
737+
if isinstance(start, str) and not _is_iso_format_string(start):
738+
msg = (
739+
"Parsing non-ISO datetime strings in .loc is deprecated "
740+
"and will be removed in a future version. Use ISO format "
741+
f"(YYYY-MM-DD) instead. Got '{start}'."
742+
)
743+
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
744+
704745
start_casted = self._maybe_cast_slice_bound(start, "left")
705746
mask = start_casted <= self
706747
in_index &= (start_casted == self).any()
707748

708749
if end is not None:
750+
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
751+
if isinstance(end, str) and not _is_iso_format_string(end):
752+
msg = (
753+
"Parsing non-ISO datetime strings in .loc is deprecated "
754+
"and will be removed in a future version. Use ISO format "
755+
f"(YYYY-MM-DD) instead. Got '{end}'."
756+
)
757+
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
758+
709759
end_casted = self._maybe_cast_slice_bound(end, "right")
710760
mask = (self <= end_casted) & mask
711761
in_index &= (end_casted == self).any()

pandas/tests/indexes/datetimes/test_partial_slicing.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import numpy as np
66
import pytest
77

8+
from pandas.errors import Pandas4Warning
9+
810
from pandas import (
911
DataFrame,
1012
DatetimeIndex,
@@ -464,3 +466,96 @@ def test_slice_reduce_to_series(self):
464466
)
465467
result = df.loc["2000", "A"]
466468
tm.assert_series_equal(result, expected)
469+
470+
471+
class TestDatetimeIndexNonISODeprecation:
472+
"""Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302"""
473+
474+
@pytest.fixture
475+
def ser_daily(self):
476+
"""Create a Series with daily DatetimeIndex for testing."""
477+
return Series(
478+
range(15),
479+
index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)),
480+
)
481+
482+
@pytest.mark.parametrize(
483+
"date_string",
484+
[
485+
"1/10/2024", # MM/DD/YYYY format
486+
"01/10/2024", # MM/DD/YYYY format with leading zero
487+
],
488+
)
489+
def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string):
490+
# GH#58302
491+
msg = "Parsing non-ISO datetime strings in .loc is deprecated"
492+
493+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
494+
result = ser_daily.loc[date_string]
495+
assert result == 9
496+
497+
@pytest.mark.parametrize(
498+
"date_string,expected",
499+
[
500+
("2024-01-10", 9), # YYYY-MM-DD (dash)
501+
("2024/01/10", 9), # YYYY/MM/DD (slash)
502+
("2024 01 10", 9), # YYYY MM DD (space)
503+
],
504+
)
505+
def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected):
506+
# GH#58302 - ISO formats should NOT warn
507+
with tm.assert_produces_warning(None):
508+
result = ser_daily.loc[date_string]
509+
assert result == expected
510+
511+
@pytest.mark.parametrize(
512+
"start_string",
513+
[
514+
"1/10/2024", # MM/DD/YYYY format
515+
"01/10/2024", # MM/DD/YYYY format with leading zero
516+
],
517+
)
518+
def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string):
519+
# GH#58302 - Non-ISO start in slice should warn
520+
msg = "Parsing non-ISO datetime strings in .loc is deprecated"
521+
522+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
523+
result = ser_daily.loc[start_string:"2024-01-15"]
524+
assert len(result) > 0
525+
526+
@pytest.mark.parametrize(
527+
"end_string",
528+
[
529+
"5-01-2024", # DD-MM-YYYY format
530+
"05-01-2024", # DD-MM-YYYY format with leading zero
531+
],
532+
)
533+
def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string):
534+
# GH#58302 - Non-ISO end in slice should warn
535+
msg = "Parsing non-ISO datetime strings in .loc is deprecated"
536+
537+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
538+
result = ser_daily.loc["2024-01-01":end_string]
539+
assert len(result) > 0
540+
541+
def test_loc_slicing_both_non_iso_deprecation(self, ser_daily):
542+
# GH#58302 - Both non-ISO should warn (twice)
543+
msg = "Parsing non-ISO datetime strings in .loc is deprecated"
544+
545+
with tm.assert_produces_warning(
546+
Pandas4Warning, match=msg, check_stacklevel=False
547+
):
548+
result = ser_daily.loc["1/10/2024":"5-01-2024"]
549+
assert len(result) > 0
550+
551+
def test_loc_slicing_iso_formats_no_warning(self, ser_daily):
552+
# GH#58302 - ISO slice formats should NOT warn
553+
with tm.assert_produces_warning(None):
554+
result = ser_daily.loc["2024-01-05":"2024-01-10"]
555+
assert len(result) == 6
556+
557+
def test_loc_non_string_keys_no_warning(self, ser_daily):
558+
# GH#58302 - Non-string keys should not warn
559+
with tm.assert_produces_warning(None):
560+
result = ser_daily.loc[Timestamp("2024-01-10")]
561+
assert result == 9

0 commit comments

Comments
 (0)