Skip to content

Commit c2c37fc

Browse files
committed
API: to_datetime strings default to microsecond
1 parent 81f8d5d commit c2c37fc

File tree

18 files changed

+176
-143
lines changed

18 files changed

+176
-143
lines changed

pandas/_libs/tslibs/conversion.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
623623
)
624624
if not string_to_dts_failed:
625625
reso = get_supported_reso(out_bestunit)
626+
if reso < NPY_FR_us:
627+
reso = NPY_FR_us
626628
check_dts_bounds(&dts, reso)
627629
obj = _TSObject()
628630
obj.dts = dts
@@ -661,6 +663,8 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
661663
nanos=&nanos,
662664
)
663665
reso = get_supported_reso(out_bestunit)
666+
if reso < NPY_FR_us:
667+
reso = NPY_FR_us
664668
return convert_datetime_to_tsobject(dt, tz, nanos=nanos, reso=reso)
665669

666670

pandas/_libs/tslibs/strptime.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,8 @@ def array_strptime(
466466
# No error reported by string_to_dts, pick back up
467467
# where we left off
468468
item_reso = get_supported_reso(out_bestunit)
469+
if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
470+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
469471
state.update_creso(item_reso)
470472
if infer_reso:
471473
creso = state.creso
@@ -510,6 +512,8 @@ def array_strptime(
510512
val, fmt, exact, format_regex, locale_time, &dts, &item_reso
511513
)
512514

515+
if item_reso < NPY_DATETIMEUNIT.NPY_FR_us:
516+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
513517
state.update_creso(item_reso)
514518
if infer_reso:
515519
creso = state.creso

pandas/tests/base/test_conversion.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -445,9 +445,9 @@ def test_to_numpy_dtype(as_series):
445445
[
446446
([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
447447
(
448-
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
448+
[Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT],
449449
None,
450-
Timestamp("2000"),
450+
Timestamp("2000").as_unit("s"),
451451
[np.datetime64("2000-01-01T00:00:00", "s")] * 3,
452452
),
453453
],
@@ -486,10 +486,14 @@ def test_to_numpy_na_value_numpy_dtype(
486486
[1, 2, 0, 4],
487487
),
488488
(
489-
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
490-
[(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))],
489+
[Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT],
490+
[
491+
(0, Timestamp("2021").as_unit("s")),
492+
(0, Timestamp("2022").as_unit("s")),
493+
(1, Timestamp("2000").as_unit("s")),
494+
],
491495
None,
492-
Timestamp("2000"),
496+
Timestamp("2000").as_unit("s"),
493497
[np.datetime64("2000-01-01T00:00:00", "s")] * 3,
494498
),
495499
],

pandas/tests/dtypes/cast/test_infer_dtype.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ def test_infer_dtype_from_scalar_errors():
155155
(1, np.int64),
156156
(1.5, np.float64),
157157
(np.datetime64("2016-01-01"), np.dtype("M8[s]")),
158-
(Timestamp("20160101"), np.dtype("M8[s]")),
159-
(Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
158+
(Timestamp("20160101").as_unit("s"), np.dtype("M8[s]")),
159+
(Timestamp("20160101", tz="UTC").as_unit("s"), "datetime64[s, UTC]"),
160160
],
161161
)
162162
def test_infer_dtype_from_scalar(value, expected, using_infer_string):

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ def test_setitem_single_column_mixed_datetime(self):
820820
columns=["foo", "bar", "baz"],
821821
)
822822

823-
df["timestamp"] = Timestamp("20010102")
823+
df["timestamp"] = Timestamp("20010102").as_unit("s")
824824

825825
# check our dtypes
826826
result = df.dtypes

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def test_setitem_dict_preserves_dtypes(self):
259259
(Period("2020-01"), PeriodDtype("M")),
260260
(Interval(left=0, right=5), IntervalDtype("int64", "right")),
261261
(
262-
Timestamp("2011-01-01", tz="US/Eastern"),
262+
Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"),
263263
DatetimeTZDtype(unit="s", tz="US/Eastern"),
264264
),
265265
],

pandas/tests/frame/methods/test_reindex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def test_reindex_tzaware_fill_value(self):
140140
# GH#52586
141141
df = DataFrame([[1]])
142142

143-
ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific")
143+
ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific").as_unit("s")
144144
res = df.reindex([0, 1], axis=1, fill_value=ts)
145145
assert res.dtypes[1] == pd.DatetimeTZDtype(unit="s", tz="US/Pacific")
146146
expected = DataFrame({0: [1], 1: [ts]})

pandas/tests/indexing/test_coercion.py

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -241,14 +241,21 @@ def test_insert_float_index(
241241
@pytest.mark.parametrize(
242242
"fill_val,exp_dtype",
243243
[
244-
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
245-
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
244+
(pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[ns]"),
245+
(
246+
pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"),
247+
"datetime64[ns, US/Eastern]",
248+
),
246249
],
247250
ids=["datetime64", "datetime64tz"],
248251
)
249252
@pytest.mark.parametrize(
250253
"insert_value",
251-
[pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
254+
[
255+
pd.Timestamp("2012-01-01").as_unit("s"),
256+
pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s"),
257+
1,
258+
],
252259
)
253260
def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
254261
obj = pd.DatetimeIndex(
@@ -264,13 +271,13 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
264271

265272
if fill_val.tz:
266273
# mismatched tzawareness
267-
ts = pd.Timestamp("2012-01-01")
274+
ts = pd.Timestamp("2012-01-01").as_unit("s")
268275
result = obj.insert(1, ts)
269276
expected = obj.astype(object).insert(1, ts)
270277
assert expected.dtype == object
271278
tm.assert_index_equal(result, expected)
272279

273-
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
280+
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s")
274281
result = obj.insert(1, ts)
275282
# once deprecation is enforced:
276283
expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
@@ -279,7 +286,7 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
279286

280287
else:
281288
# mismatched tzawareness
282-
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
289+
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s")
283290
result = obj.insert(1, ts)
284291
expected = obj.astype(object).insert(1, ts)
285292
assert expected.dtype == object
@@ -302,7 +309,7 @@ def test_insert_index_timedelta64(self):
302309
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
303310
)
304311

305-
for item in [pd.Timestamp("2012-01-01"), 1]:
312+
for item in [pd.Timestamp("2012-01-01").as_unit("s"), 1]:
306313
result = obj.insert(1, item)
307314
expected = obj.astype(object).insert(1, item)
308315
assert expected.dtype == object
@@ -312,7 +319,11 @@ def test_insert_index_timedelta64(self):
312319
"insert, coerced_val, coerced_dtype",
313320
[
314321
(pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
315-
(pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
322+
(
323+
pd.Timestamp("2012-01-01").as_unit("s"),
324+
pd.Timestamp("2012-01-01").as_unit("s"),
325+
object,
326+
),
316327
(1, 1, object),
317328
("x", "x", object),
318329
],
@@ -451,8 +462,8 @@ def test_where_series_bool(self, index_or_series, fill_val, exp_dtype):
451462
@pytest.mark.parametrize(
452463
"fill_val,exp_dtype",
453464
[
454-
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
455-
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
465+
(pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[ns]"),
466+
(pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), object),
456467
],
457468
ids=["datetime64", "datetime64tz"],
458469
)
@@ -596,8 +607,8 @@ def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype):
596607
@pytest.mark.parametrize(
597608
"fill_val,fill_dtype",
598609
[
599-
(pd.Timestamp("2012-01-01"), "datetime64[s]"),
600-
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
610+
(pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[s]"),
611+
(pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), object),
601612
(1, object),
602613
("x", object),
603614
],
@@ -607,31 +618,37 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
607618
klass = index_or_series
608619
obj = klass(
609620
[
610-
pd.Timestamp("2011-01-01"),
621+
pd.Timestamp("2011-01-01").as_unit("s"),
611622
pd.NaT,
612-
pd.Timestamp("2011-01-03"),
613-
pd.Timestamp("2011-01-04"),
623+
pd.Timestamp("2011-01-03").as_unit("s"),
624+
pd.Timestamp("2011-01-04").as_unit("s"),
614625
]
615626
)
616627
assert obj.dtype == "datetime64[s]"
617628

618629
exp = klass(
619630
[
620-
pd.Timestamp("2011-01-01"),
631+
pd.Timestamp("2011-01-01").as_unit("s"),
621632
fill_val,
622-
pd.Timestamp("2011-01-03"),
623-
pd.Timestamp("2011-01-04"),
633+
pd.Timestamp("2011-01-03").as_unit("s"),
634+
pd.Timestamp("2011-01-04").as_unit("s"),
624635
]
625636
)
626637
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
627638

628639
@pytest.mark.parametrize(
629640
"fill_val,fill_dtype",
630641
[
631-
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[s, US/Eastern]"),
632-
(pd.Timestamp("2012-01-01"), object),
642+
(
643+
pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"),
644+
"datetime64[s, US/Eastern]",
645+
),
646+
(pd.Timestamp("2012-01-01").as_unit("s"), object),
633647
# pre-2.0 with a mismatched tz we would get object result
634-
(pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[s, US/Eastern]"),
648+
(
649+
pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s"),
650+
"datetime64[s, US/Eastern]",
651+
),
635652
(1, object),
636653
("x", object),
637654
],
@@ -642,10 +659,10 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
642659

643660
obj = klass(
644661
[
645-
pd.Timestamp("2011-01-01", tz=tz),
662+
pd.Timestamp("2011-01-01", tz=tz).as_unit("s"),
646663
pd.NaT,
647-
pd.Timestamp("2011-01-03", tz=tz),
648-
pd.Timestamp("2011-01-04", tz=tz),
664+
pd.Timestamp("2011-01-03", tz=tz).as_unit("s"),
665+
pd.Timestamp("2011-01-04", tz=tz).as_unit("s"),
649666
]
650667
)
651668
assert obj.dtype == "datetime64[s, US/Eastern]"
@@ -656,10 +673,10 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
656673
fv = fill_val.tz_convert(tz)
657674
exp = klass(
658675
[
659-
pd.Timestamp("2011-01-01", tz=tz),
676+
pd.Timestamp("2011-01-01", tz=tz).as_unit("s"),
660677
fv,
661-
pd.Timestamp("2011-01-03", tz=tz),
662-
pd.Timestamp("2011-01-04", tz=tz),
678+
pd.Timestamp("2011-01-03", tz=tz).as_unit("s"),
679+
pd.Timestamp("2011-01-04", tz=tz).as_unit("s"),
663680
]
664681
)
665682
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@@ -672,8 +689,8 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
672689
1 + 1j,
673690
True,
674691
pd.Interval(1, 2, closed="left"),
675-
pd.Timestamp("2012-01-01", tz="US/Eastern"),
676-
pd.Timestamp("2012-01-01"),
692+
pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"),
693+
pd.Timestamp("2012-01-01").as_unit("s"),
677694
pd.Timedelta(days=1),
678695
pd.Period("2016-01-01", "D"),
679696
],
@@ -716,8 +733,8 @@ def test_fillna_series_timedelta64(self):
716733
1 + 1j,
717734
True,
718735
pd.Interval(1, 2, closed="left"),
719-
pd.Timestamp("2012-01-01", tz="US/Eastern"),
720-
pd.Timestamp("2012-01-01"),
736+
pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"),
737+
pd.Timestamp("2012-01-01").as_unit("s"),
721738
pd.Timedelta(days=1),
722739
pd.Period("2016-01-01", "W"),
723740
],
@@ -751,14 +768,17 @@ class TestReplaceSeriesCoercion(CoercionBase):
751768
rep["float64"] = [1.1, 2.2]
752769
rep["complex128"] = [1 + 1j, 2 + 2j]
753770
rep["bool"] = [True, False]
754-
rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
771+
rep["datetime64[ns]"] = [
772+
pd.Timestamp("2011-01-01").as_unit("s"),
773+
pd.Timestamp("2011-01-03").as_unit("s"),
774+
]
755775

756776
for tz in ["UTC", "US/Eastern"]:
757777
# to test tz => different tz replacement
758778
key = f"datetime64[ns, {tz}]"
759779
rep[key] = [
760-
pd.Timestamp("2011-01-01", tz=tz),
761-
pd.Timestamp("2011-01-03", tz=tz),
780+
pd.Timestamp("2011-01-01", tz=tz).as_unit("s"),
781+
pd.Timestamp("2011-01-03", tz=tz).as_unit("s"),
762782
]
763783

764784
rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]

pandas/tests/io/excel/test_readers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,9 @@ def df_ref(datapath):
136136

137137
def get_exp_unit(read_ext: str, engine: str | None) -> str:
138138
unit = "us"
139-
if (read_ext == ".ods") ^ (engine == "calamine"):
139+
if read_ext == ".ods" and engine == "odf":
140+
pass
141+
elif (read_ext == ".ods") ^ (engine == "calamine"):
140142
unit = "s"
141143
return unit
142144

pandas/tests/io/excel/test_writers.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838

3939

4040
def get_exp_unit(path: str) -> str:
41-
if path.endswith(".ods"):
42-
return "s"
4341
return "us"
4442

4543

@@ -297,13 +295,13 @@ def test_read_excel_parse_dates(self, tmp_excel):
297295

298296
res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0)
299297
expected = df[:]
300-
expected["date_strings"] = expected["date_strings"].astype("M8[s]")
298+
expected["date_strings"] = expected["date_strings"].astype("M8[us]")
301299
tm.assert_frame_equal(res, expected)
302300

303301
res = pd.read_excel(
304302
tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0
305303
)
306-
expected["date_strings"] = expected["date_strings"].astype("M8[s]")
304+
expected["date_strings"] = expected["date_strings"].astype("M8[us]")
307305
tm.assert_frame_equal(expected, res)
308306

309307
def test_multiindex_interval_datetimes(self, tmp_excel):
@@ -356,17 +354,15 @@ def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells):
356354
MultiIndex.from_arrays(
357355
[
358356
[
359-
pd.to_datetime("2006-10-06 00:00:00"),
360-
pd.to_datetime("2006-10-07 00:00:00"),
357+
pd.to_datetime("2006-10-06 00:00:00").as_unit("s"),
358+
pd.to_datetime("2006-10-07 00:00:00").as_unit("s"),
361359
],
362360
["X", "Y"],
363361
],
364362
names=["date", "category"],
365363
),
366364
)
367-
time_format = (
368-
"datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]"
369-
)
365+
time_format = "datetime64[us]"
370366
expected.index = expected.index.set_levels(
371367
expected.index.levels[0].astype(time_format), level=0
372368
)

0 commit comments

Comments
 (0)