diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 72b40982abb0c..9674cb383c1e9 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -241,6 +241,19 @@ inferred frequency upon creation: pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer") +In most cases, parsing strings to datetimes (with any of :func:`to_datetime`, :class:`DatetimeIndex`, or :class:`Timestamp`) will produce objects with microsecond ("us") unit. The exception to this rule is if your strings have nanosecond precision, in which case the result will have "ns" unit: + +.. ipython:: python + + pd.to_datetime(["2016-01-01 02:03:04"]).unit + pd.to_datetime(["2016-01-01 02:03:04.123"]).unit + pd.to_datetime(["2016-01-01 02:03:04.123456"]).unit + pd.to_datetime(["2016-01-01 02:03:04.123456789"]).unit + +.. versionchanged:: 3.0.0 + + Previously, :func:`to_datetime` and :class:`DatetimeIndex` would always parse strings to "ns" unit. During pandas 2.x, :class:`Timestamp` could give any of "s", "ms", "us", or "ns" depending on the specificity of the input string. + .. _timeseries.converting.format: Providing a format argument @@ -379,6 +392,16 @@ We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s") +Another common way to perform this conversion is to convert directly to an integer dtype. Note that the exact integers this produces will depend on the specific unit +or resolution of the datetime64 dtype: + +.. ipython:: python + + stamps.astype(np.int64) + stamps.astype("datetime64[s]").astype(np.int64) + stamps.astype("datetime64[ms]").astype(np.int64) + + .. _timeseries.origin: Using the ``origin`` parameter diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0f7be8cfbcb68..f7799c4e65a8a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -358,7 +358,7 @@ When passing strings, the resolution will depend on the precision of the string, In [5]: pd.to_datetime(["2024-03-22 11:43:01.002003004"]).dtype Out[5]: dtype(' Series: (Period("2012-01", freq="M"), "period[M]"), (Period("2012-02-01", freq="D"), "period[D]"), ( - Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"), DatetimeTZDtype(unit="s", tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 54920b1a8a2a3..3569a578943d4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -370,7 +370,7 @@ def unique(values): array([2, 1]) >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) - array(['2016-01-01T00:00:00'], dtype='datetime64[s]') + array(['2016-01-01T00:00:00.000000'], dtype='datetime64[us]') >>> pd.unique( ... pd.Series( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2f7330d1e81fe..6aace23bf6da0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1923,11 +1923,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ _floor_example = """>>> rng.floor('h') @@ -1950,11 +1950,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ _ceil_example = """>>> rng.ceil('h') @@ -1977,11 +1977,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.ceil("h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.ceil("h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9b7ae26bef899..96b5683dd2811 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -220,7 +220,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): ... ) ['2023-01-01 00:00:00', '2023-01-02 00:00:00'] - Length: 2, dtype: datetime64[s] + Length: 2, dtype: datetime64[us] """ __module__ = "pandas.arrays" @@ -612,7 +612,7 @@ def tz(self) -> tzinfo | None: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.tz datetime.timezone.utc @@ -1441,7 +1441,7 @@ def time(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.time 0 10:00:00 1 11:00:00 @@ -1484,7 +1484,7 @@ def timetz(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.timetz 0 10:00:00+00:00 1 11:00:00+00:00 @@ -1526,7 +1526,7 @@ def date(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.date 0 2020-01-01 1 2020-02-01 @@ -1875,7 +1875,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.dayofyear 0 1 1 32 @@ -1911,7 +1911,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-04-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.quarter 0 1 1 2 @@ -1947,7 +1947,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.daysinmonth 0 31 1 29 diff --git a/pandas/core/base.py b/pandas/core/base.py index 7d7e43808be5c..d1aa37ed18016 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1380,7 +1380,7 @@ def factorize( 0 2000-03-11 1 2000-03-12 2 2000-03-13 - dtype: datetime64[s] + dtype: datetime64[us] >>> ser.searchsorted('3/14/2000') np.int64(3) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 1dd4ed7100ccf..73d0a0924aa27 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -150,7 +150,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) >>> pd.isna(index) array([False, False, True, False]) @@ -365,7 +365,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) >>> pd.notna(index) array([ True, True, False, True]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 25e0aa6b8f072..491402497420b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6305,7 +6305,7 @@ def dtypes(self): >>> df.dtypes float float64 int int64 - datetime datetime64[s] + datetime datetime64[us] string str dtype: object """ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5918d0f263379..93e04fe61555e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1473,7 +1473,7 @@ def idxmin(self, skipna: bool = True) -> Series: >>> ser.groupby(["a", "a", "b", "b"]).idxmin() a 2023-01-01 b 2023-02-01 - dtype: datetime64[s] + dtype: datetime64[us] """ return self._idxmax_idxmin("idxmin", skipna=skipna) @@ -1534,7 +1534,7 @@ def idxmax(self, skipna: bool = True) -> Series: >>> ser.groupby(["a", "a", "b", "b"]).idxmax() a 2023-01-15 b 2023-02-15 - dtype: datetime64[s] + dtype: datetime64[us] """ return self._idxmax_idxmin("idxmax", skipna=skipna) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ec0c2c896423b..d26a9313865cd 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -252,7 +252,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> idx DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) """ _typ = "datetimeindex" diff --git a/pandas/core/series.py b/pandas/core/series.py index 9bbcfe0c913c9..6d7e713b7ad6b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2130,14 +2130,14 @@ def unique(self) -> ArrayLike: >>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique() ['2016-01-01 00:00:00'] - Length: 1, dtype: datetime64[s] + Length: 1, dtype: datetime64[us] >>> pd.Series( ... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)] ... ).unique() ['2016-01-01 00:00:00-05:00'] - Length: 1, dtype: datetime64[s, US/Eastern] + Length: 1, dtype: datetime64[us, US/Eastern] A Categorical will return categories in the order of appearance and with the same dtype. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ce311d0c89b55..a2c18ccb59899 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -884,7 +884,7 @@ def to_datetime( >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 - dtype: datetime64[s] + dtype: datetime64[us] Using a unix epoch time @@ -927,14 +927,14 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"]) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) - Timezone-aware inputs *with constant time offset* are converted to timezone-aware :class:`DatetimeIndex`: >>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"]) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], - dtype='datetime64[s, UTC-05:00]', freq=None) + dtype='datetime64[us, UTC-05:00]', freq=None) - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) @@ -976,14 +976,14 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) - Timezone-aware inputs are *converted* to UTC (the output represents the exact same datetime, but viewed from the UTC time offset `+00:00`). >>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True) DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) - Inputs can contain both string or datetime, the above rules still apply diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index cd78dfd6f343a..c327d1b647bce 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -127,7 +127,7 @@ def test_dt64_array(dtype_unit): ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"), ), ( ["2000", "2001"], @@ -323,7 +323,7 @@ def test_array_copy(): ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])), # datetime ( - [pd.Timestamp("2000"), pd.Timestamp("2001")], + [pd.Timestamp("2000").as_unit("s"), pd.Timestamp("2001").as_unit("s")], DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), ), ( @@ -342,7 +342,10 @@ def test_array_copy(): ), # datetimetz ( - [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], + [ + pd.Timestamp("2000", tz="CET").as_unit("s"), + pd.Timestamp("2001", tz="CET").as_unit("s"), + ], DatetimeArray._from_sequence( ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="s") ), diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 2f1ae66299356..62717fda75594 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -445,9 +445,9 @@ def test_to_numpy_dtype(as_series): [ ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]), ( - [Timestamp("2000"), Timestamp("2000"), pd.NaT], + [Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT], None, - Timestamp("2000"), + Timestamp("2000").as_unit("s"), [np.datetime64("2000-01-01T00:00:00", "s")] * 3, ), ], @@ -486,10 +486,14 @@ def test_to_numpy_na_value_numpy_dtype( [1, 2, 0, 4], ), ( - [Timestamp("2000"), Timestamp("2000"), pd.NaT], - [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))], + [Timestamp("2000").as_unit("s"), Timestamp("2000").as_unit("s"), pd.NaT], + [ + (0, Timestamp("2021").as_unit("s")), + (0, Timestamp("2022").as_unit("s")), + (1, Timestamp("2000").as_unit("s")), + ], None, - Timestamp("2000"), + Timestamp("2000").as_unit("s"), [np.datetime64("2000-01-01T00:00:00", "s")] * 3, ), ], diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 679031a625c2d..94caa930900b6 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -155,8 +155,8 @@ def test_infer_dtype_from_scalar_errors(): (1, np.int64), (1.5, np.float64), (np.datetime64("2016-01-01"), np.dtype("M8[s]")), - (Timestamp("20160101"), np.dtype("M8[s]")), - (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"), + (Timestamp("20160101").as_unit("s"), np.dtype("M8[s]")), + (Timestamp("20160101", tz="UTC").as_unit("s"), "datetime64[s, UTC]"), ], ) def test_infer_dtype_from_scalar(value, expected, using_infer_string): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 875aa38481ecf..3237831ece24d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3485,9 +3485,9 @@ def test_string_to_datetime_parsing_cast(): # GH 56266 string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"] result = pd.Series(string_dates, dtype="timestamp[s][pyarrow]") - expected = pd.Series( - ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) - ) + + pd_res = pd.to_datetime(string_dates).as_unit("s") + expected = pd.Series(ArrowExtensionArray(pa.array(pd_res, from_pandas=True))) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 2b36c1135d36d..b6d23a2f9be24 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -820,7 +820,7 @@ def test_setitem_single_column_mixed_datetime(self): columns=["foo", "bar", "baz"], ) - df["timestamp"] = Timestamp("20010102") + df["timestamp"] = Timestamp("20010102").as_unit("s") # check our dtypes result = df.dtypes diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 906c7654ef11f..0de2455079027 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -259,7 +259,7 @@ def test_setitem_dict_preserves_dtypes(self): (Period("2020-01"), PeriodDtype("M")), (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( - Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"), DatetimeTZDtype(unit="s", tz="US/Eastern"), ), ], diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index e97bb2a98a390..07483a8cd91fe 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -195,14 +195,15 @@ def test_combine_first_convert_datatime_correctly( def test_combine_first_align_nan(self): # GH 7509 (not fixed) - dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + ts = pd.Timestamp("2011-01-01").as_unit("s") + dfa = DataFrame([[ts, 2]], columns=["a", "b"]) dfb = DataFrame([[4], [5]], columns=["b"]) assert dfa["a"].dtype == "datetime64[s]" assert dfa["b"].dtype == "int64" res = dfa.combine_first(dfb) exp = DataFrame( - {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]}, + {"a": [ts, pd.NaT], "b": [2, 5]}, columns=["a", "b"], ) tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 666fcb1b5143b..9121799cd2019 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -25,7 +25,7 @@ def test_get_numeric_data(self, using_infer_string): objectname = np.dtype(np.object_).name df = DataFrame( - {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102").as_unit("s")}, index=np.arange(10), ) result = df.dtypes diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 3973f47aed240..8ace0a48c8e28 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -140,7 +140,7 @@ def test_reindex_tzaware_fill_value(self): # GH#52586 df = DataFrame([[1]]) - ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific") + ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific").as_unit("s") res = df.reindex([0, 1], axis=1, fill_value=ts) assert res.dtypes[1] == pd.DatetimeTZDtype(unit="s", tz="US/Pacific") expected = DataFrame({0: [1], 1: [ts]}) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index e5fddcb23c60b..e2bf84e3bd8c3 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -50,7 +50,7 @@ def test_to_csv_from_csv1_datetime(self, temp_file, datetime_frame): datetime_frame.to_csv(path) recons = self.read_csv(path, parse_dates=True) expected = datetime_frame.copy() - expected.index = expected.index.as_unit("s") + expected.index = expected.index.as_unit("us") tm.assert_frame_equal(expected, recons) datetime_frame.to_csv(path, index_label="index") @@ -240,8 +240,8 @@ def make_dtnat_arr(n, nnat=None): result = self.read_csv(path).apply(to_datetime) expected = df[:] - expected["a"] = expected["a"].astype("M8[s]") - expected["b"] = expected["b"].astype("M8[s]") + expected["a"] = expected["a"].astype("M8[us]") + expected["b"] = expected["b"].astype("M8[us]") tm.assert_frame_equal(result, expected, check_names=False) def _return_result_expected( @@ -585,7 +585,7 @@ def test_to_csv_multiindex(self, temp_file, float_frame, datetime_frame): # TODO to_csv drops column name expected = tsframe.copy() - expected.index = MultiIndex.from_arrays([old_index.as_unit("s"), new_index[1]]) + expected.index = MultiIndex.from_arrays([old_index.as_unit("us"), new_index[1]]) tm.assert_frame_equal(recons, expected, check_names=False) # do not load index diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index f68d7f533645d..aace5dfaa8dff 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -55,11 +55,15 @@ def test_to_numpy_datetime_with_na(self): df = DataFrame( { - "a": [Timestamp("1970-01-01"), Timestamp("1970-01-02"), NaT], + "a": [ + Timestamp("1970-01-01").as_unit("s"), + Timestamp("1970-01-02").as_unit("s"), + NaT, + ], "b": [ - Timestamp("1970-01-01"), + Timestamp("1970-01-01").as_unit("s"), np.nan, - Timestamp("1970-01-02"), + Timestamp("1970-01-02").as_unit("s"), ], "c": [ 1, diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index c525a3c6494c5..0caaafec909ac 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -194,7 +194,7 @@ def test_construction_with_conversions(self): expected = DataFrame( { - "dt1": Timestamp("20130101"), + "dt1": Timestamp("20130101").as_unit("s"), "dt2": date_range("20130101", periods=3).astype("M8[s]"), # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), # FIXME: don't leave commented-out diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..896a795adac43 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -879,9 +879,10 @@ def create_data(constructor): ) result_datetime64 = DataFrame(data_datetime64) + assert result_datetime64.index.unit == "s" + result_datetime64.index = result_datetime64.index.as_unit("us") result_datetime = DataFrame(data_datetime) assert result_datetime.index.unit == "us" - result_datetime.index = result_datetime.index.as_unit("s") result_Timestamp = DataFrame(data_Timestamp) tm.assert_frame_equal(result_datetime64, expected) tm.assert_frame_equal(result_datetime, expected) @@ -944,7 +945,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): (Period("2020-01"), PeriodDtype("M")), (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( - Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"), DatetimeTZDtype(unit="s", tz="US/Eastern"), ), ], @@ -1849,7 +1850,7 @@ def test_constructor_with_datetimes(self, using_infer_string): "A": 1, "B": "foo", "C": "bar", - "D": Timestamp("20010101"), + "D": Timestamp("20010101").as_unit("s"), "E": datetime(2001, 1, 2, 0, 0), }, index=np.arange(10), @@ -3076,9 +3077,9 @@ def test_from_tzaware_mixed_object_array(self): res = DataFrame(arr, columns=["A", "B", "C"]) expected_dtypes = [ - "datetime64[s]", - "datetime64[s, US/Eastern]", - "datetime64[s, CET]", + "datetime64[us]", + "datetime64[us, US/Eastern]", + "datetime64[us, CET]", ] assert (res.dtypes == expected_dtypes).all() diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 5ef36331a20fa..14652e863b5c6 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -738,7 +738,7 @@ def test_groupby_first_datetime64(self): def test_groupby_max_datetime64(self): # GH 5869 # datetimelike dtype conversion from int - df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) + df = DataFrame({"A": Timestamp("20130101").as_unit("s"), "B": np.arange(5)}) # TODO: can we retain second reso in .apply here? expected = df.groupby("A")["A"].apply(lambda x: x.max()).astype("M8[s]") result = df.groupby("A")["A"].max() diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c418b2a18008b..d3de0648a6ba3 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -524,7 +524,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], - dtype="M8[s, US/Eastern]", + dtype="M8[us, US/Eastern]", name="idx", ) tm.assert_index_equal(dti, expected) @@ -602,7 +602,7 @@ def test_constructor_coverage(self): expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) - from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("s") + from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("us") tm.assert_index_equal(from_ints, expected) # string with NaT @@ -611,7 +611,7 @@ def test_constructor_coverage(self): expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) - from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("s") + from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("us") tm.assert_index_equal(from_ints, expected) # non-conforming @@ -940,11 +940,11 @@ def test_dti_tz_constructors(self, tzstr): idx1 = to_datetime(arr).tz_localize(tzstr) idx2 = date_range( - start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr, unit="s" + start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr, unit="us" ) idx2 = idx2._with_freq(None) # the others all have freq=None - idx3 = DatetimeIndex(arr, tz=tzstr).as_unit("s") - idx4 = DatetimeIndex(np.array(arr), tz=tzstr).as_unit("s") + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) tm.assert_index_equal(idx1, idx2) tm.assert_index_equal(idx1, idx3) @@ -1198,9 +1198,9 @@ def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self): yfirst = Timestamp(2005, 10, 16, tz="US/Pacific") result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True) - expected1 = DatetimeIndex([dfirst]).as_unit("s") + expected1 = DatetimeIndex([dfirst]) tm.assert_index_equal(result1, expected1) result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True) - expected2 = DatetimeIndex([yfirst]).as_unit("s") + expected2 = DatetimeIndex([yfirst]) tm.assert_index_equal(result2, expected2) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index cf0bc2b872e1a..eedcc6d4e8819 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -210,14 +210,21 @@ def test_insert_float_index( @pytest.mark.parametrize( "fill_val,exp_dtype", [ - (pd.Timestamp("2012-01-01"), "datetime64[ns]"), - (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[ns]"), + ( + pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), + "datetime64[ns, US/Eastern]", + ), ], ids=["datetime64", "datetime64tz"], ) @pytest.mark.parametrize( "insert_value", - [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1], + [ + pd.Timestamp("2012-01-01").as_unit("s"), + pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s"), + 1, + ], ) def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): obj = pd.DatetimeIndex( @@ -233,13 +240,13 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): if fill_val.tz: # mismatched tzawareness - ts = pd.Timestamp("2012-01-01") + ts = pd.Timestamp("2012-01-01").as_unit("s") result = obj.insert(1, ts) expected = obj.astype(object).insert(1, ts) assert expected.dtype == object tm.assert_index_equal(result, expected) - ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s") result = obj.insert(1, ts) # once deprecation is enforced: expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) @@ -248,7 +255,7 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): else: # mismatched tzawareness - ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") + ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s") result = obj.insert(1, ts) expected = obj.astype(object).insert(1, ts) assert expected.dtype == object @@ -271,7 +278,7 @@ def test_insert_index_timedelta64(self): obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" ) - for item in [pd.Timestamp("2012-01-01"), 1]: + for item in [pd.Timestamp("2012-01-01").as_unit("s"), 1]: result = obj.insert(1, item) expected = obj.astype(object).insert(1, item) assert expected.dtype == object @@ -281,7 +288,11 @@ def test_insert_index_timedelta64(self): "insert, coerced_val, coerced_dtype", [ (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), - (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object), + ( + pd.Timestamp("2012-01-01").as_unit("s"), + pd.Timestamp("2012-01-01").as_unit("s"), + object, + ), (1, 1, object), ("x", "x", object), ], @@ -420,8 +431,8 @@ def test_where_series_bool(self, index_or_series, fill_val, exp_dtype): @pytest.mark.parametrize( "fill_val,exp_dtype", [ - (pd.Timestamp("2012-01-01"), "datetime64[ns]"), - (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + (pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), object), ], ids=["datetime64", "datetime64tz"], ) @@ -561,8 +572,8 @@ def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype): @pytest.mark.parametrize( "fill_val,fill_dtype", [ - (pd.Timestamp("2012-01-01"), "datetime64[s]"), - (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), + (pd.Timestamp("2012-01-01").as_unit("s"), "datetime64[s]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), object), (1, object), ("x", object), ], @@ -572,20 +583,20 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): klass = index_or_series obj = klass( [ - pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-01").as_unit("s"), pd.NaT, - pd.Timestamp("2011-01-03"), - pd.Timestamp("2011-01-04"), + pd.Timestamp("2011-01-03").as_unit("s"), + pd.Timestamp("2011-01-04").as_unit("s"), ] ) assert obj.dtype == "datetime64[s]" exp = klass( [ - pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-01").as_unit("s"), fill_val, - pd.Timestamp("2011-01-03"), - pd.Timestamp("2011-01-04"), + pd.Timestamp("2011-01-03").as_unit("s"), + pd.Timestamp("2011-01-04").as_unit("s"), ] ) self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @@ -593,10 +604,16 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): @pytest.mark.parametrize( "fill_val,fill_dtype", [ - (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[s, US/Eastern]"), - (pd.Timestamp("2012-01-01"), object), + ( + pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), + "datetime64[s, US/Eastern]", + ), + (pd.Timestamp("2012-01-01").as_unit("s"), object), # pre-2.0 with a mismatched tz we would get object result - (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[s, US/Eastern]"), + ( + pd.Timestamp("2012-01-01", tz="Asia/Tokyo").as_unit("s"), + "datetime64[s, US/Eastern]", + ), (1, object), ("x", object), ], @@ -607,10 +624,10 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): obj = klass( [ - pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-01", tz=tz).as_unit("s"), pd.NaT, - pd.Timestamp("2011-01-03", tz=tz), - pd.Timestamp("2011-01-04", tz=tz), + pd.Timestamp("2011-01-03", tz=tz).as_unit("s"), + pd.Timestamp("2011-01-04", tz=tz).as_unit("s"), ] ) assert obj.dtype == "datetime64[s, US/Eastern]" @@ -621,10 +638,10 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): fv = fill_val.tz_convert(tz) exp = klass( [ - pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-01", tz=tz).as_unit("s"), fv, - pd.Timestamp("2011-01-03", tz=tz), - pd.Timestamp("2011-01-04", tz=tz), + pd.Timestamp("2011-01-03", tz=tz).as_unit("s"), + pd.Timestamp("2011-01-04", tz=tz).as_unit("s"), ] ) self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @@ -637,8 +654,8 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): 1 + 1j, True, pd.Interval(1, 2, closed="left"), - pd.Timestamp("2012-01-01", tz="US/Eastern"), - pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), + pd.Timestamp("2012-01-01").as_unit("s"), pd.Timedelta(days=1), pd.Period("2016-01-01", "D"), ], @@ -681,8 +698,8 @@ def test_fillna_series_timedelta64(self): 1 + 1j, True, pd.Interval(1, 2, closed="left"), - pd.Timestamp("2012-01-01", tz="US/Eastern"), - pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01", tz="US/Eastern").as_unit("s"), + pd.Timestamp("2012-01-01").as_unit("s"), pd.Timedelta(days=1), pd.Period("2016-01-01", "W"), ], @@ -716,14 +733,17 @@ class TestReplaceSeriesCoercion(CoercionBase): rep["float64"] = [1.1, 2.2] rep["complex128"] = [1 + 1j, 2 + 2j] rep["bool"] = [True, False] - rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")] + rep["datetime64[ns]"] = [ + pd.Timestamp("2011-01-01").as_unit("s"), + pd.Timestamp("2011-01-03").as_unit("s"), + ] for tz in ["UTC", "US/Eastern"]: # to test tz => different tz replacement key = f"datetime64[ns, {tz}]" rep[key] = [ - pd.Timestamp("2011-01-01", tz=tz), - pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-01", tz=tz).as_unit("s"), + pd.Timestamp("2011-01-03", tz=tz).as_unit("s"), ] rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8d59b0c026e0c..1877746aa9c97 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -721,7 +721,7 @@ def test_loc_modify_datetime(self): {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} ) - df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("ms") + df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("us") df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 6f20d0e4e7cbf..0e75e3b3f8a3f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -577,9 +577,9 @@ def test_partial_set_invalid(self): date_range(start="2000", periods=20, freq="D", unit="s"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ - Timestamp("2000-01-04"), - Timestamp("2000-01-08"), - Timestamp("2000-01-12"), + Timestamp("2000-01-04").as_unit("s"), + Timestamp("2000-01-08").as_unit("s"), + Timestamp("2000-01-12").as_unit("s"), ], ), ( diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index fca63b1709dce..a50b1f4b5c306 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -136,7 +136,9 @@ def df_ref(datapath): def get_exp_unit(read_ext: str, engine: str | None) -> str: unit = "us" - if (read_ext == ".ods") ^ (engine == "calamine"): + if read_ext == ".ods" and engine == "odf": + pass + elif (read_ext == ".ods") ^ (engine == "calamine"): unit = "s" return unit diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 5e23095f9abbe..ab95ecf827e6e 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -38,8 +38,6 @@ def get_exp_unit(path: str) -> str: - if path.endswith(".ods"): - return "s" return "us" @@ -297,13 +295,13 @@ def test_read_excel_parse_dates(self, tmp_excel): res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0) expected = df[:] - expected["date_strings"] = expected["date_strings"].astype("M8[s]") + expected["date_strings"] = expected["date_strings"].astype("M8[us]") tm.assert_frame_equal(res, expected) res = pd.read_excel( tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0 ) - expected["date_strings"] = expected["date_strings"].astype("M8[s]") + expected["date_strings"] = expected["date_strings"].astype("M8[us]") tm.assert_frame_equal(expected, res) def test_multiindex_interval_datetimes(self, tmp_excel): @@ -356,17 +354,15 @@ def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells): MultiIndex.from_arrays( [ [ - pd.to_datetime("2006-10-06 00:00:00"), - pd.to_datetime("2006-10-07 00:00:00"), + pd.to_datetime("2006-10-06 00:00:00").as_unit("s"), + pd.to_datetime("2006-10-07 00:00:00").as_unit("s"), ], ["X", "Y"], ], names=["date", "category"], ), ) - time_format = ( - "datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]" - ) + time_format = "datetime64[us]" expected.index = expected.index.set_levels( expected.index.levels[0].astype(time_format), level=0 ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9c93be0937e91..44ba18c2cd183 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -128,7 +128,10 @@ def test_frame_non_unique_index_raises(self, orient): [["a", "b"], ["c", "d"]], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], - [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + [ + [Timestamp("20130101").as_unit("s"), 3.5], + [Timestamp("20130102").as_unit("s"), 4.5], + ], ], ) def test_frame_non_unique_columns(self, orient, data, request): diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 5a814930fe8bd..981c254652f07 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -64,10 +64,12 @@ def test_read_csv_local(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], - dtype="M8[s]", + dtype="M8[us]", name="index", ), ) + if parser.engine == "pyarrow": + expected.index = expected.index.astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -183,10 +185,12 @@ def test_read_csv_dataframe(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], - dtype="M8[s]", + dtype="M8[us]", name="index", ), ) + if parser.engine == "pyarrow": + expected.index = expected.index.astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index cfa8785b24bde..7570a4df1e89f 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -263,7 +263,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): datetime(2000, 1, 6), datetime(2000, 1, 7), ], - dtype="M8[s]", + dtype="M8[us]", ), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index a0ccae5a84941..fa9465c9a66af 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -155,5 +155,5 @@ def test_multi_thread_path_multipart_read_csv(tmp_path, all_parsers): result = _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks) expected = df[:] - expected["date"] = expected["date"].astype("M8[s]") + expected["date"] = expected["date"].astype("M8[us]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 034e5d3f811a2..7aed7acb8e50d 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -63,7 +63,7 @@ def test_date_col_as_index_col(all_parsers): datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 22, 0), ], - dtype="M8[s]", + dtype="M8[us]", name="X1", ) expected = DataFrame( @@ -128,7 +128,7 @@ def test_parse_dates_string(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) # freq doesn't round-trip - index = date_range("1/1/2009", periods=3, name="date", unit="s")._with_freq(None) + index = date_range("1/1/2009", periods=3, name="date", unit="us")._with_freq(None) expected = DataFrame( {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index @@ -145,8 +145,8 @@ def test_parse_dates_column_list(all_parsers, parse_dates): expected = DataFrame( {"a": [datetime(2010, 1, 1)], "b": [1], "c": [datetime(2010, 2, 15)]} ) - expected["a"] = expected["a"].astype("M8[s]") - expected["c"] = expected["c"].astype("M8[s]") + expected["a"] = expected["a"].astype("M8[us]") + expected["c"] = expected["c"].astype("M8[us]") expected = expected.set_index(["a", "b"]) result = parser.read_csv( @@ -170,7 +170,7 @@ def test_multi_index_parse_dates(all_parsers, index_col): 20090103,three,c,4,5 """ parser = all_parsers - dti = date_range("2009-01-01", periods=3, freq="D", unit="s") + dti = date_range("2009-01-01", periods=3, freq="D", unit="us") index = MultiIndex.from_product( [ dti, @@ -220,6 +220,7 @@ def test_parse_tz_aware(all_parsers): if parser.engine == "pyarrow": pytz = pytest.importorskip("pytz") expected_tz = pytz.utc + expected.index = expected.index.as_unit("s") else: expected_tz = timezone.utc tm.assert_frame_equal(result, expected) @@ -304,7 +305,6 @@ def test_parse_dates_empty_string(all_parsers): expected = DataFrame( [[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"] ) - expected["Date"] = expected["Date"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -315,13 +315,14 @@ def test_parse_dates_empty_string(all_parsers): ( "a\n04.15.2016", {"parse_dates": ["a"]}, - DataFrame([datetime(2016, 4, 15)], columns=["a"], dtype="M8[s]"), + DataFrame([datetime(2016, 4, 15)], columns=["a"], dtype="M8[us]"), ), ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, DataFrame( - index=DatetimeIndex(["2016-04-15"], dtype="M8[s]", name="a"), columns=[] + index=DatetimeIndex(["2016-04-15"], dtype="M8[us]", name="a"), + columns=[], ), ), ( @@ -329,7 +330,7 @@ def test_parse_dates_empty_string(all_parsers): {"parse_dates": ["a", "b"]}, DataFrame( [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], - dtype="M8[s]", + dtype="M8[us]", columns=["a", "b"], ), ), @@ -340,8 +341,8 @@ def test_parse_dates_empty_string(all_parsers): index=MultiIndex.from_tuples( [ ( - Timestamp(2016, 4, 15).as_unit("s"), - Timestamp(2013, 9, 16).as_unit("s"), + Timestamp(2016, 4, 15), + Timestamp(2013, 9, 16), ) ], names=["a", "b"], @@ -412,7 +413,7 @@ def test_parse_timezone(all_parsers): end="2018-01-04 09:05:00", freq="1min", tz=timezone(timedelta(minutes=540)), - unit="s", + unit="us", )._with_freq(None) expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} @@ -451,7 +452,7 @@ def test_parse_delimited_date_swap_no_warning( all_parsers, date_string, dayfirst, expected, request ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[s]") + expected = DataFrame({0: [expected]}, dtype="datetime64[us]") if parser.engine == "pyarrow": if not dayfirst: # "CSV parse error: Empty CSV file or block" @@ -484,7 +485,7 @@ def test_parse_delimited_date_swap_with_warning( all_parsers, date_string, dayfirst, expected ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[s]") + expected = DataFrame({0: [expected]}, dtype="datetime64[us]") warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " "Pass `dayfirst=.*` or specify a format to silence this warning." @@ -603,7 +604,6 @@ def test_date_parser_usecols_thousands(all_parsers): thousands="-", ) expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) - expected["C"] = expected["C"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -613,7 +613,7 @@ def test_dayfirst_warnings(): # CASE 1: valid input input = "date\n31/12/2014\n10/03/2011" expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None, name="date" + ["2014-12-31", "2011-03-10"], dtype="datetime64[us]", freq=None, name="date" ) warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " @@ -674,7 +674,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst): # GH47880 initial_value = f"date\n{date_string}" expected = DatetimeIndex( - ["2014-01-31"], dtype="datetime64[s]", freq=None, name="date" + ["2014-01-31"], dtype="datetime64[us]", freq=None, name="date" ) warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " @@ -730,7 +730,7 @@ def test_replace_nans_before_parsing_dates(all_parsers): Timestamp("2017-09-09"), ] }, - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_frame_equal(result, expected) @@ -745,7 +745,6 @@ def test_parse_dates_and_string_dtype(all_parsers): result = parser.read_csv(StringIO(data), dtype="string", parse_dates=["b"]) expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]}) expected["a"] = expected["a"].astype("string") - expected["b"] = expected["b"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -765,7 +764,7 @@ def test_parse_dot_separated_dates(all_parsers): else: expected_index = DatetimeIndex( ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], - dtype="datetime64[ms]", + dtype="datetime64[us]", name="a", ) warn = UserWarning @@ -799,7 +798,7 @@ def test_parse_dates_dict_format(all_parsers): "a": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], "b": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], }, - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_frame_equal(result, expected) @@ -841,6 +840,8 @@ def test_parse_dates_arrow_engine(all_parsers): "b": 1, } ) + if parser.engine == "pyarrow": + expected["a"] = expected["a"].astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 4b78048a3a073..ae6eef3ba0d48 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -307,7 +307,6 @@ def test_fwf_regression(): parse_dates=True, date_format="%Y%j%H%M%S", ) - expected.index = expected.index.astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 99642ee4befc6..619b53abec8c7 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -43,7 +43,7 @@ def test_skip_rows_bug(all_parsers, skiprows): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - dtype="M8[s]", + dtype="M8[us]", name=0, ) @@ -88,7 +88,7 @@ def test_skip_rows_blank(all_parsers): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - dtype="M8[s]", + dtype="M8[us]", name=0, ) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 479f2468a86ab..308fae487f842 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -826,7 +826,7 @@ def test_append_raise(setup_path, using_infer_string): msg = re.escape( "Cannot serialize the column [foo] " "because its data contents are not [string] " - "but [datetime64[s]] object dtype" + "but [datetime64[us]] object dtype" ) with pytest.raises(ValueError, match=msg): store.append("df", df) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index de2845dd99262..aacdfb1f82f6d 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -231,9 +231,9 @@ def test_table_values_dtypes_roundtrip(temp_file, using_infer_string): df1["float322"] = 1.0 df1["float322"] = df1["float322"].astype("float32") df1["bool"] = df1["float32"] > 0 - df1["time_s_1"] = Timestamp("20130101") - df1["time_s_2"] = Timestamp("20130101 00:00:00") - df1["time_ms"] = Timestamp("20130101 00:00:00.000") + df1["time_s_1"] = Timestamp("20130101").as_unit("s") + df1["time_s_2"] = Timestamp("20130101 00:00:00").as_unit("s") + df1["time_ms"] = Timestamp("20130101 00:00:00.000").as_unit("ms") df1["time_ns"] = Timestamp("20130102 00:00:00.000000000") store.append("df_mixed_dtypes1", df1) diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index a452d9136eed7..5d76a622d2914 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -78,7 +78,7 @@ def test_read_csv(cleared_fs, df1): df2 = read_csv("memory://test/test.csv", parse_dates=["dt"]) expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") + expected["dt"] = expected["dt"].astype("M8[us]") tm.assert_frame_equal(df2, expected) @@ -103,7 +103,7 @@ def test_to_csv(cleared_fs, df1): df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0) expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") + expected["dt"] = expected["dt"].astype("M8[us]") tm.assert_frame_equal(df2, expected) @@ -116,7 +116,7 @@ def test_to_excel(cleared_fs, df1): df2 = read_excel(path, parse_dates=["dt"], index_col=0) expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") + expected["dt"] = expected["dt"].astype("M8[us]") tm.assert_frame_equal(df2, expected) @@ -140,7 +140,7 @@ def test_to_csv_fsspec_object(cleared_fs, binary_mode, df1): assert not fsspec_object.closed expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") + expected["dt"] = expected["dt"].astype("M8[us]") tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 7702003ccd1e2..ad58fccbf4102 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -119,7 +119,7 @@ def from_uri(path): expected = df1[:] if format in ["csv", "excel"]: - expected["dt"] = expected["dt"].dt.as_unit("s") + expected["dt"] = expected["dt"].dt.as_unit("us") tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 535643bf35205..81d7f80c69040 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1065,7 +1065,7 @@ def test_parse_dates_list(self, flavor_read_html): df = DataFrame({"date": date_range("1/1/2001", periods=10)}) expected = df[:] - expected["date"] = expected["date"].dt.as_unit("s") + expected["date"] = expected["date"].dt.as_unit("us") str_df = df.to_html() res = flavor_read_html(StringIO(str_df), parse_dates=[1], index_col=0) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 2927b24624026..e450cd76f3ad3 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -709,7 +709,6 @@ def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine): class TestParquetPyArrow(Base): - @pytest.mark.xfail(reason="datetime_with_nat unit doesn't round-trip") def test_basic(self, pa, df_full, temp_file): df = df_full pytest.importorskip("pyarrow", "11.0.0") @@ -747,7 +746,7 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): expected = df_full.copy() expected.loc[1, "string_with_nan"] = None - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[ms]") + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[us]") tm.assert_frame_equal(res, expected) def test_duplicate_columns(self, pa, temp_file): @@ -1060,7 +1059,7 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full, temp_file): pa_table = pyarrow.Table.from_pandas(df) expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype) expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "timestamp[ms][pyarrow]" + "timestamp[us][pyarrow]" ) check_round_trip( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5865c46b4031e..66beb5013130c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1831,7 +1831,7 @@ def test_api_custom_dateparsing_error( pytest.mark.xfail(reason="failing combination of arguments") ) - expected = types_data_frame.astype({"DateCol": "datetime64[s]"}) + expected = types_data_frame.astype({"DateCol": "datetime64[us]"}) result = read_sql( text, @@ -1854,12 +1854,6 @@ def test_api_custom_dateparsing_error( } ) - if conn_name == "postgresql_adbc_types" and pa_version_under14p1: - expected["DateCol"] = expected["DateCol"].astype("datetime64[ns]") - elif "postgres" in conn_name or "mysql" in conn_name: - expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") - else: - expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 20dbc6d2b4aeb..cef50e93b55bc 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -250,7 +250,7 @@ def test_resample_empty_sum_string(string_dtype_no_object, min_count): result = rs.sum(min_count=min_count) value = "" if min_count == 0 else pd.NA - index = date_range(start="2000-01-01", freq="20s", periods=2, unit="s") + index = date_range(start="2000-01-01", freq="20s", periods=2, unit="us") expected = Series(value, index=index, dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index f49ff8310f855..770e5cc3d2214 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -22,14 +22,14 @@ "category": Categorical(["X", "Y", "Z"]), "object": ["a", "b", "c"], "datetime64[s]": [ - pd.Timestamp("2011-01-01"), - pd.Timestamp("2011-01-02"), - pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-01").as_unit("s"), + pd.Timestamp("2011-01-02").as_unit("s"), + pd.Timestamp("2011-01-03").as_unit("s"), ], "datetime64[s, US/Eastern]": [ - pd.Timestamp("2011-01-01", tz="US/Eastern"), - pd.Timestamp("2011-01-02", tz="US/Eastern"), - pd.Timestamp("2011-01-03", tz="US/Eastern"), + pd.Timestamp("2011-01-01", tz="US/Eastern").as_unit("s"), + pd.Timestamp("2011-01-02", tz="US/Eastern").as_unit("s"), + pd.Timestamp("2011-01-03", tz="US/Eastern").as_unit("s"), ], "timedelta64[ns]": [ pd.Timedelta("1 days"), diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index b1cba7ee31eac..198c652b43365 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -362,7 +362,7 @@ def test_concat_tz_series_tzlocal(self): result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y)) - assert result.dtype == "datetime64[s, tzlocal()]" + assert result.dtype == "datetime64[us, tzlocal()]" def test_concat_tz_series_with_datetimelike(self): # see gh-12620: tz and timedelta diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 5b535ad0061ba..cff097a1169cf 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -1,5 +1,3 @@ -from datetime import datetime - import numpy as np import pytest @@ -468,9 +466,9 @@ def test_datetime_bin(conv): bins = [conv(v) for v in bin_data] result = Series(cut(data, bins=bins)) - if type(bins[0]) is datetime: + if type(bins[0]) is np.datetime64: # The bins have microsecond dtype -> so does result - expected = expected.astype("interval[datetime64[us]]") + expected = expected.astype("interval[datetime64[s]]") expected = expected.astype(CategoricalDtype(ordered=True)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index cad67a8db7eb8..5eaef85d75175 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -67,7 +67,7 @@ def test_tz_localize_ambiguous_bool(self, unit, tz): assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value def test_tz_localize_ambiguous(self): - ts = Timestamp("2014-11-02 01:00") + ts = Timestamp("2014-11-02 01:00").as_unit("s") ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) @@ -114,25 +114,25 @@ def test_tz_localize_nonexistent(self, stamp, tz): "2015-03-29 02:00:00", "Europe/Warsaw", "2015-03-29 03:00:00", - "2015-03-29 01:59:59", + "2015-03-29 01:59:59.999999", ), # utc+1 -> utc+2 ( "2023-03-12 02:00:00", "America/Los_Angeles", "2023-03-12 03:00:00", - "2023-03-12 01:59:59", + "2023-03-12 01:59:59.999999", ), # utc-8 -> utc-7 ( "2023-03-26 01:00:00", "Europe/London", "2023-03-26 02:00:00", - "2023-03-26 00:59:59", + "2023-03-26 00:59:59.999999", ), # utc+0 -> utc+1 ( "2023-03-26 00:00:00", "Atlantic/Azores", "2023-03-26 01:00:00", - "2023-03-25 23:59:59", + "2023-03-25 23:59:59.999999", ), # utc-1 -> utc+0 ], ) @@ -191,13 +191,14 @@ def test_tz_localize_ambiguous_compat(self): # when the transition happens pytz = pytest.importorskip("pytz") naive = Timestamp("2013-10-27 01:00:00") + assert naive.unit == "us" pytz_zone = pytz.timezone("Europe/London") dateutil_zone = "dateutil/Europe/London" result_pytz = naive.tz_localize(pytz_zone, ambiguous=False) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=False) assert result_pytz._value == result_dateutil._value - assert result_pytz._value == 1382835600 + assert result_pytz._value == 1_382_835_600 * 10**6 # fixed ambiguous behavior # see gh-14621, GH#45087 @@ -209,7 +210,7 @@ def test_tz_localize_ambiguous_compat(self): result_pytz = naive.tz_localize(pytz_zone, ambiguous=True) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=True) assert result_pytz._value == result_dateutil._value - assert result_pytz._value == 1382832000 + assert result_pytz._value == 1_382_832_000 * 10**6 # see gh-14621 assert str(result_pytz) == str(result_dateutil) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 5c89cb3a66c2f..e8268a075b67f 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -434,31 +434,31 @@ class TestTimestampResolutionInference: def test_construct_from_time_unit(self): # GH#54097 only passing a time component, no date ts = Timestamp("01:01:01.111") - assert ts.unit == "ms" + assert ts.unit == "us" def test_constructor_str_infer_reso(self): # non-iso8601 path # _parse_delimited_date path ts = Timestamp("01/30/2023") - assert ts.unit == "s" + assert ts.unit == "us" # _parse_dateabbr_string path ts = Timestamp("2015Q1") - assert ts.unit == "s" + assert ts.unit == "us" # dateutil_parse path ts = Timestamp("2016-01-01 1:30:01 PM") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2016 June 3 15:25:01.345") - assert ts.unit == "ms" + assert ts.unit == "us" ts = Timestamp("300-01-01") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("300 June 1:30:01.300") - assert ts.unit == "ms" + assert ts.unit == "us" # dateutil path -> don't drop trailing zeros ts = Timestamp("01-01-2013T00:00:00.000000000+0000") @@ -474,10 +474,10 @@ def test_constructor_str_infer_reso(self): # GH#56208 minute reso through the ISO8601 path with tz offset ts = Timestamp("2020-01-01 00:00+00:00") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2020-01-01 00+00:00") - assert ts.unit == "s" + assert ts.unit == "us" @pytest.mark.parametrize("method", ["now", "today"]) def test_now_today_unit(self, method): @@ -514,10 +514,10 @@ def test_construct_from_string_invalid_raises(self): def test_constructor_from_iso8601_str_with_offset_reso(self): # GH#49737 ts = Timestamp("2016-01-01 04:05:06-01:00") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2016-01-01 04:05:06.000-01:00") - assert ts.unit == "ms" + assert ts.unit == "us" ts = Timestamp("2016-01-01 04:05:06.000000-01:00") assert ts.unit == "us" @@ -830,10 +830,10 @@ def test_out_of_bounds_string(self): Timestamp("2263-01-01").as_unit("ns") ts = Timestamp("2263-01-01") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("1676-01-01") - assert ts.unit == "s" + assert ts.unit == "us" def test_barely_out_of_bounds(self): # GH#19529 @@ -883,7 +883,7 @@ def test_out_of_bounds_string_consistency(self, arg): Timestamp(arg).as_unit("ns") ts = Timestamp(arg) - assert ts.unit == "s" + assert ts.unit == "us" assert ts.year == ts.month == ts.day == 1 def test_min_valid(self): @@ -1086,14 +1086,15 @@ def test_non_nano_value(): msg = ( r"Cannot convert Timestamp to nanoseconds without overflow. " r"Use `.asm8.view\('i8'\)` to cast represent Timestamp in its " - r"own unit \(here, s\).$" + r"own unit \(here, us\).$" ) ts = Timestamp("0300-01-01") + assert ts.unit == "us" with pytest.raises(OverflowError, match=msg): ts.value # check that the suggested workaround actually works result = ts.asm8.view("i8") - assert result == -52700112000 + assert result == -52_700_112_000 * 10**6 @pytest.mark.parametrize("na_value", [None, np.nan, np.datetime64("NaT"), NaT, NA]) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 016b7db3e689a..86d49dc519b6a 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -364,9 +364,11 @@ def test_dt_round_tz_ambiguous(self, method): ) def test_dt_round_tz_nonexistent(self, method, ts_str, freq): # GH 23324 round near "spring forward" DST - ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")]) + ser = Series([pd.Timestamp(ts_str, tz="America/Chicago").as_unit("s")]) result = getattr(ser.dt, method)(freq, nonexistent="shift_forward") - expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")]) + expected = Series( + [pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago").as_unit("s")] + ) tm.assert_series_equal(result, expected) result = getattr(ser.dt, method)(freq, nonexistent="NaT") diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index f894005296781..5c977de71ad84 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1483,7 +1483,7 @@ class TestCoercionDatetime64HigherReso(CoercionTest): def obj(self, exp_dtype): idx = date_range("2011-01-01", freq="D", periods=4, unit="s") if exp_dtype == "m8[ms]": - idx = idx - Timestamp("1970-01-01") + idx = idx - Timestamp("1970-01-01").as_unit("s") assert idx.dtype == "m8[s]" elif exp_dtype == "M8[ms, UTC]": idx = idx.tz_localize("UTC") @@ -1493,7 +1493,7 @@ def obj(self, exp_dtype): def val(self, exp_dtype): ts = Timestamp("2011-01-02 03:04:05.678").as_unit("ms") if exp_dtype == "m8[ms]": - return ts - Timestamp("1970-01-01") + return ts - Timestamp("1970-01-01").as_unit("s") elif exp_dtype == "M8[ms, UTC]": return ts.tz_localize("UTC") return ts diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index d39db924c6773..7de879681d74f 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -87,7 +87,9 @@ def test_combine_first_dt64_casting_deprecation(self, unit): rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), "2011"], dtype=f"datetime64[{unit}]") - + if unit in ["s", "ms"]: + # TODO: should _cast_pointwise_result attempt to preserve unit? + xp = xp.dt.as_unit("us") tm.assert_series_equal(rs, xp) def test_combine_first_dt_tz_values(self, tz_naive_fixture): diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index f53d75df83124..e3d091132dca3 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -313,7 +313,7 @@ def test_datetime64_fillna(self): def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): # GH#56410 dti = date_range("2016-01-01", periods=3, unit="s", tz=tz) - item = Timestamp("2016-02-03 04:05:06.789", tz=tz) + item = Timestamp("2016-02-03 04:05:06.789", tz=tz).as_unit("ms") vec = date_range(item, periods=3, unit="ms") exp_dtype = "M8[ms]" if tz is None else "M8[ms, UTC]" @@ -340,8 +340,9 @@ def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): ) def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar): # GH#56410 - tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp("1970-01-01") - item = Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01") + ts = Timestamp("1970-01-01").as_unit("s") + tdi = date_range("2016-01-01", periods=3, unit="s") - ts + item = Timestamp("2016-02-03 04:05:06.789").as_unit("ms") - ts vec = timedelta_range(item, periods=3, unit="ms") expected = Series([item, tdi[1], tdi[2]], dtype="m8[ms]") @@ -388,7 +389,7 @@ def test_datetime64_tz_fillna(self, tz, unit): ) null_loc = Series([False, True, False, True]) - result = ser.fillna(Timestamp("2011-01-02 10:00")) + result = ser.fillna(Timestamp("2011-01-02 10:00").as_unit("s")) expected = Series( [ Timestamp("2011-01-01 10:00"), @@ -402,13 +403,13 @@ def test_datetime64_tz_fillna(self, tz, unit): # check s is not changed tm.assert_series_equal(isna(ser), null_loc) - result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) + result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).as_unit("s")) expected = Series( [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-01 10:00").as_unit("s"), + Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-03 10:00").as_unit("s"), + Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), ], ) tm.assert_series_equal(expected, result) @@ -417,9 +418,9 @@ def test_datetime64_tz_fillna(self, tz, unit): result = ser.fillna("AAA") expected = Series( [ - Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-01 10:00").as_unit("s"), "AAA", - Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-03 10:00").as_unit("s"), "AAA", ], dtype=object, @@ -429,23 +430,26 @@ def test_datetime64_tz_fillna(self, tz, unit): result = ser.fillna( { - 1: Timestamp("2011-01-02 10:00", tz=tz), - 3: Timestamp("2011-01-04 10:00"), + 1: Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + 3: Timestamp("2011-01-04 10:00").as_unit("s"), } ) expected = Series( [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00"), - Timestamp("2011-01-04 10:00"), + Timestamp("2011-01-01 10:00").as_unit("s"), + Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-03 10:00").as_unit("s"), + Timestamp("2011-01-04 10:00").as_unit("s"), ] ) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna( - {1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")} + { + 1: Timestamp("2011-01-02 10:00").as_unit("s"), + 3: Timestamp("2011-01-04 10:00").as_unit("s"), + } ) expected = Series( [ @@ -467,19 +471,19 @@ def test_datetime64_tz_fillna(self, tz, unit): assert ser.dtype == f"datetime64[{unit}, {tz}]" tm.assert_series_equal(isna(ser), null_loc) - result = ser.fillna(Timestamp("2011-01-02 10:00")) + result = ser.fillna(Timestamp("2011-01-02 10:00").as_unit("s")) expected = Series( [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2011-01-02 10:00"), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-01 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-02 10:00").as_unit("s"), + Timestamp("2011-01-03 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-02 10:00").as_unit("s"), ] ) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) - result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) + result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).as_unit("s")) idx = DatetimeIndex( [ "2011-01-01 10:00", @@ -510,9 +514,9 @@ def test_datetime64_tz_fillna(self, tz, unit): result = ser.fillna("AAA") expected = Series( [ - Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-01 10:00", tz=tz).as_unit("s"), "AAA", - Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz).as_unit("s"), "AAA", ], dtype=object, @@ -522,16 +526,16 @@ def test_datetime64_tz_fillna(self, tz, unit): result = ser.fillna( { - 1: Timestamp("2011-01-02 10:00", tz=tz), - 3: Timestamp("2011-01-04 10:00"), + 1: Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + 3: Timestamp("2011-01-04 10:00").as_unit("s"), } ) expected = Series( [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2011-01-02 10:00", tz=tz), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2011-01-04 10:00"), + Timestamp("2011-01-01 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-03 10:00", tz=tz).as_unit("s"), + Timestamp("2011-01-04 10:00").as_unit("s"), ] ) tm.assert_series_equal(expected, result) @@ -539,8 +543,8 @@ def test_datetime64_tz_fillna(self, tz, unit): result = ser.fillna( { - 1: Timestamp("2011-01-02 10:00", tz=tz), - 3: Timestamp("2011-01-04 10:00", tz=tz), + 1: Timestamp("2011-01-02 10:00", tz=tz).as_unit("s"), + 3: Timestamp("2011-01-04 10:00", tz=tz).as_unit("s"), } ) expected = Series( @@ -555,13 +559,13 @@ def test_datetime64_tz_fillna(self, tz, unit): tm.assert_series_equal(isna(ser), null_loc) # filling with a naive/other zone, coerce to object - result = ser.fillna(Timestamp("20130101")) + result = ser.fillna(Timestamp("20130101").as_unit("s")) expected = Series( [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01"), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01"), + Timestamp("2011-01-01 10:00", tz=tz).as_unit("s"), + Timestamp("2013-01-01").as_unit("s"), + Timestamp("2011-01-03 10:00", tz=tz).as_unit("s"), + Timestamp("2013-01-01").as_unit("s"), ] ) tm.assert_series_equal(expected, result) @@ -569,13 +573,13 @@ def test_datetime64_tz_fillna(self, tz, unit): # pre-2.0 fillna with mixed tzs would cast to object, in 2.0 # it retains dtype. - result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) + result = ser.fillna(Timestamp("20130101", tz="US/Pacific").as_unit("s")) expected = Series( [ - Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), - Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), + Timestamp("2011-01-01 10:00", tz=tz).as_unit("s"), + Timestamp("2013-01-01", tz="US/Pacific").as_unit("s").tz_convert(tz), + Timestamp("2011-01-03 10:00", tz=tz).as_unit("s"), + Timestamp("2013-01-01", tz="US/Pacific").as_unit("s").tz_convert(tz), ] ).dt.as_unit(unit) tm.assert_series_equal(expected, result) @@ -584,20 +588,20 @@ def test_datetime64_tz_fillna(self, tz, unit): def test_fillna_dt64tz_with_method(self): # with timezone # GH#15855 - ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT]) + ser = Series([Timestamp("2012-11-11 00:00:00+01:00").as_unit("s"), NaT]) exp = Series( [ - Timestamp("2012-11-11 00:00:00+01:00"), - Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00").as_unit("s"), + Timestamp("2012-11-11 00:00:00+01:00").as_unit("s"), ] ) tm.assert_series_equal(ser.ffill(), exp) - ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")]) + ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00").as_unit("s")]) exp = Series( [ - Timestamp("2012-11-11 00:00:00+01:00"), - Timestamp("2012-11-11 00:00:00+01:00"), + Timestamp("2012-11-11 00:00:00+01:00").as_unit("s"), + Timestamp("2012-11-11 00:00:00+01:00").as_unit("s"), ] ) tm.assert_series_equal(ser.bfill(), exp) diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 3e3eb36112680..d67d0b5235aba 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -32,7 +32,7 @@ def test_from_csv(self, datetime_series, string_series, temp_file): datetime_series.to_csv(path, header=False) ts = self.read_csv(path, parse_dates=True) expected = datetime_series.copy() - expected.index = expected.index.as_unit("s") + expected.index = expected.index.as_unit("us") tm.assert_series_equal(expected, ts, check_names=False) assert ts.name is None @@ -59,7 +59,6 @@ def test_from_csv(self, datetime_series, string_series, temp_file): series = self.read_csv(path, sep="|", parse_dates=True) check_series = Series({datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}) - check_series.index = check_series.index.as_unit("s") tm.assert_series_equal(check_series, series) series = self.read_csv(path, sep="|", parse_dates=False) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 018ae06562148..2f1e4ee9a620e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1164,8 +1164,8 @@ def test_constructor_with_datetime_tz4(self): # inference ser = Series( [ - Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), - Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific").as_unit("s"), + Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific").as_unit("s"), ] ) assert ser.dtype == "datetime64[s, US/Pacific]" @@ -1413,10 +1413,10 @@ def create_data(constructor): result_datetime = Series(data_datetime) result_Timestamp = Series(data_Timestamp) - tm.assert_series_equal(result_datetime64, expected) tm.assert_series_equal( - result_datetime, expected.set_axis(expected.index.as_unit("us")) + result_datetime64, expected.set_axis(expected.index.as_unit("s")) ) + tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) def test_constructor_dict_tuple_indexer(self): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c9cda0c4153cb..67b1e8668e5f6 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -132,7 +132,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): expected = Series( [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -159,7 +159,7 @@ def test_to_datetime_format_YYYYMM_with_nat(self, cache): expected = Series( [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -173,7 +173,7 @@ def test_to_datetime_format_YYYYMMDD_oob_for_ns(self, cache): result = to_datetime(ser, format="%Y%m%d", errors="raise", cache=cache) expected = Series( np.array(["2012-12-31", "2014-12-31", "9999-12-31"], dtype="M8[s]"), - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_series_equal(result, expected) @@ -182,7 +182,7 @@ def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # GH 7930 ser = Series([20121231, 20141231, 999999999999999999999999999991231]) result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) - expected = Series(["20121231", "20141231", "NaT"], dtype="M8[s]") + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[us]") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -544,6 +544,25 @@ def test_to_datetime_parse_timezone_keeps_name(self): class TestToDatetime: + def test_to_datetime_mixed_string_resos(self): + # GH#62801 + vals = [ + "2016-01-01 01:02:03", + "2016-01-01 01:02:03.001", + "2016-01-01 01:02:03.001002", + "2016-01-01 01:02:03.001002003", + ] + expected = DatetimeIndex([Timestamp(x).as_unit("ns") for x in vals]) + + result1 = DatetimeIndex(vals) + tm.assert_index_equal(result1, expected) + + result2 = to_datetime(vals, format="ISO8601") + tm.assert_index_equal(result2, expected) + + result3 = to_datetime(vals, format="mixed") + tm.assert_index_equal(result3, expected) + def test_to_datetime_none(self): # GH#23055 assert to_datetime(None) is NaT @@ -596,7 +615,7 @@ def test_to_datetime_mixed_date_and_string(self, format): # https://github.com/pandas-dev/pandas/issues/50108 d1 = date(2020, 1, 2) res = to_datetime(["2020-01-01", d1], format=format) - expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[s]") + expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[us]") tm.assert_index_equal(res, expected) @pytest.mark.parametrize( @@ -659,7 +678,7 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ts2 = args[1] result = to_datetime([ts1, ts2], format=fmt, utc=utc) if constructor is Timestamp: - expected = expected.as_unit("s") + expected = expected.as_unit("us") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -731,7 +750,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%m-%d %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="ISO8601, UTC", ), @@ -739,7 +758,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%d-%m %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="non-ISO8601, UTC", ), @@ -1175,7 +1194,7 @@ def test_to_datetime_tz(self, cache): result = to_datetime(arr, cache=cache) expected = DatetimeIndex( ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" - ).as_unit("s") + ) tm.assert_index_equal(result, expected) def test_to_datetime_tz_mixed(self, cache): @@ -1194,7 +1213,7 @@ def test_to_datetime_tz_mixed(self, cache): result = to_datetime(arr, cache=cache, errors="coerce") expected = DatetimeIndex( - ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[s, US/Pacific]" + ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[us, US/Pacific]" ) tm.assert_index_equal(result, expected) @@ -1495,8 +1514,9 @@ def test_to_datetime_cache_scalar(self): "s", ), ( - (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, - (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, + (None, Timestamp("2012-07-26").as_unit("s")) + + (NaT,) * start_caching_at, + (NaT, Timestamp("2012-07-26").as_unit("s")) + (NaT,) * start_caching_at, "s", ), ( @@ -1505,7 +1525,7 @@ def test_to_datetime_cache_scalar(self): + ("2012 July 26", Timestamp("2012-07-26")), (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), Timestamp("2012-07-26")), - "s", + "us", ), ), ) @@ -1596,13 +1616,15 @@ def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds): format.startswith("%B") ^ outofbounds.startswith("J") ): # the strings don't match the given format, so they raise and we coerce - expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[s]") + expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[us]") elif isinstance(outofbounds, datetime): expected = DatetimeIndex( [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" ) else: - expected = DatetimeIndex([datetime(2018, 3, 1), outofbounds], dtype="M8[s]") + expected = DatetimeIndex( + [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" + ) tm.assert_index_equal(result, expected) def test_to_datetime_malformed_no_raise(self): @@ -1662,7 +1684,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): result = to_datetime(ts_strings, utc=True) expected = DatetimeIndex( [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" - ).as_unit("s") + ) tm.assert_index_equal(result, expected) def test_mixed_offsets_with_native_datetime_utc_false_raises(self): @@ -1688,7 +1710,7 @@ def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) expected = DatetimeIndex( [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2 - ).as_unit("s") + ) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1711,7 +1733,7 @@ def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 res = to_datetime(dt_str, format="%Y%m%d") dtobj = datetime.strptime(dt_str, "%Y%m%d") - expected = Timestamp(dtobj).as_unit("s") + expected = Timestamp(dtobj) assert res == expected assert res.unit == expected.unit @@ -2232,7 +2254,7 @@ def test_dataframe_utc_true(self): df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = to_datetime(df, utc=True) expected = Series( - np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[s]") + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[us]") ).dt.tz_localize("UTC") tm.assert_series_equal(result, expected) @@ -2436,9 +2458,7 @@ def test_to_datetime_with_space_in_series(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) result_coerce = to_datetime(ser, errors="coerce", cache=cache) - expected_coerce = Series( - [datetime(2006, 10, 18), datetime(2008, 10, 18), NaT] - ).dt.as_unit("s") + expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) tm.assert_series_equal(result_coerce, expected_coerce) @td.skip_if_not_us_locale @@ -2559,7 +2579,7 @@ def test_string_na_nat_conversion(self, cache): strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object) - expected = np.empty(4, dtype="M8[s]") + expected = np.empty(4, dtype="M8[us]") for i, val in enumerate(strings): if isna(val): expected[i] = iNaT @@ -2604,7 +2624,7 @@ def test_string_na_nat_conversion_with_name(self, cache): result = to_datetime(series, cache=cache) dresult = to_datetime(dseries, cache=cache) - expected = Series(np.empty(5, dtype="M8[s]"), index=idx) + expected = Series(np.empty(5, dtype="M8[us]"), index=idx) for i in range(5): x = series.iloc[i] if isna(x): @@ -2644,7 +2664,7 @@ def test_dayfirst(self, cache): arr = ["10/02/2014", "11/02/2014", "12/02/2014"] expected = DatetimeIndex( [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] - ).as_unit("s") + ) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True, cache=cache) @@ -2668,7 +2688,7 @@ def test_dayfirst_warnings_valid_input(self): # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None + ["2014-12-31", "2011-03-10"], dtype="datetime64[us]", freq=None ) # A. dayfirst arg correct, no warning @@ -2773,7 +2793,7 @@ def test_to_datetime_consistent_format(self, cache): ser = Series(np.array(data)) result = to_datetime(ser, cache=cache) expected = Series( - ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[s]" + ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2785,7 +2805,9 @@ def test_to_datetime_series_with_nans(self, cache): ) ) result = to_datetime(ser, cache=cache) - expected = Series(["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[s]") + expected = Series( + ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[us]" + ) tm.assert_series_equal(result, expected) def test_to_datetime_series_start_with_nans(self, cache): @@ -2804,7 +2826,7 @@ def test_to_datetime_series_start_with_nans(self, cache): result = to_datetime(ser, cache=cache) expected = Series( - [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[s]" + [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2818,7 +2840,6 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): result = to_datetime(ser) tz = timezone(timedelta(minutes=offset)) expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)]) - expected = expected.dt.as_unit("s") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -2984,10 +3005,7 @@ def test_parsers(self, date_str, expected, cache): reso = { "nanosecond": "ns", - "microsecond": "us", - "millisecond": "ms", - "second": "s", - }.get(reso_attrname, "s") + }.get(reso_attrname, "us") result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below @@ -3432,7 +3450,7 @@ def test_empty_string_datetime(errors, args, format): # coerce empty string to pd.NaT result = to_datetime(td, format=format, errors=errors) - expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[s]") + expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[us]") tm.assert_series_equal(expected, result) @@ -3617,7 +3635,7 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): # GH 50887 vals = ["2020-01-01 00:00+00:00", ""] result = to_datetime(vals, format="mixed") - expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[s, UTC]") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[us, UTC]") tm.assert_index_equal(result, expected) # Check that a couple of other similar paths work the same way diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index fc0000553049e..d244c9bfc72d8 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -111,7 +111,7 @@ def test_array_to_datetime_with_tz_resolution(self): tz = tzoffset("custom", 3600) vals = np.array(["2016-01-01 02:03:04.567", NaT], dtype=object) res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer) - assert res.dtype == "M8[ms]" + assert res.dtype == "M8[us]" vals2 = np.array([datetime(2016, 1, 1, 2, 3, 4), NaT], dtype=object) res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer) @@ -155,7 +155,7 @@ def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) - expected = np.array(expected, dtype="M8[s]") + expected = np.array(expected, dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) @@ -211,8 +211,8 @@ def test_parsing_different_timezone_offsets(): [ (date(1000, 1, 1), "s"), (datetime(1000, 1, 1), "us"), - ("1000-01-01", "s"), - ("Jan 1, 1000", "s"), + ("1000-01-01", "us"), + ("Jan 1, 1000", "us"), (np.datetime64("1000-01-01"), "s"), ], ) @@ -235,7 +235,7 @@ def test_coerce_outside_ns_bounds_one_valid(): result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["1000-01-01T00:00:00.000000000", "2000-01-01T00:00:00.000000000"] - expected = np.array(expected, dtype="M8[s]") + expected = np.array(expected, dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) @@ -245,13 +245,13 @@ def test_coerce_of_invalid_datetimes(): # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[us]")) # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[us]")) def test_to_datetime_barely_out_of_bounds(): diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index d726006b03f6d..c63d3dbd9f5c7 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -32,11 +32,12 @@ def test_array_strptime_resolution_all_nat(self): @pytest.mark.parametrize("tz", [None, timezone.utc]) def test_array_strptime_resolution_inference_homogeneous_strings(self, tz): dt = datetime(2016, 1, 2, 3, 4, 5, 678900, tzinfo=tz) + dt0 = dt.replace(microsecond=0) fmt = "%Y-%m-%d %H:%M:%S" dtstr = dt.strftime(fmt) arr = np.array([dtstr] * 3, dtype=object) - expected = np.array([dt.replace(tzinfo=None)] * 3, dtype="M8[s]") + expected = np.array([dt0.replace(tzinfo=None)] * 3, dtype="M8[us]") res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) @@ -97,14 +98,14 @@ def test_array_strptime_resolution_todaynow(self): def test_array_strptime_str_outside_nano_range(self): vals = np.array(["2401-09-15"], dtype=object) - expected = np.array(["2401-09-15"], dtype="M8[s]") + expected = np.array(["2401-09-15"], dtype="M8[us]") fmt = "ISO8601" res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) # non-iso -> different path vals2 = np.array(["Sep 15, 2401"], dtype=object) - expected2 = np.array(["2401-09-15"], dtype="M8[s]") + expected2 = np.array(["2401-09-15"], dtype="M8[us]") fmt2 = "%b %d, %Y" res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer) tm.assert_numpy_array_equal(res2, expected2) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 60bbcf08ce8e7..d3bce7b4bbf65 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -52,12 +52,12 @@ def test_tzlocal_offset(): # see gh-13583 # # Get offset using normal datetime for test. - ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) + ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()).as_unit("s") offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) offset = offset.total_seconds() - assert ts._value + offset == Timestamp("2011-01-01")._value + assert ts._value + offset == Timestamp("2011-01-01").as_unit("s")._value def test_tzlocal_is_not_utc():