diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6121f2097a2f1..40ef12babb000 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1192,6 +1192,7 @@ MultiIndex I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`) +- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`) - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index ee6ac6584569e..ceac6fe9296df 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -13,6 +13,7 @@ Any, ClassVar, Self, + cast, ) import numpy as np @@ -44,7 +45,10 @@ ) if TYPE_CHECKING: - from pandas._typing import npt + from pandas._typing import ( + TimeUnit, + npt, + ) class PyTablesScope(_scope.Scope): @@ -225,15 +229,19 @@ def stringify(value): if conv_val.tz is not None: conv_val = conv_val.tz_convert("UTC") return TermValue(conv_val, conv_val._value, kind) - elif kind in ("timedelta64", "timedelta"): + elif kind.startswith("timedelta"): + unit = "ns" + if "[" in kind: + unit = cast("TimeUnit", kind.split("[")[-1][:-1]) if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): conv_val = Timedelta(conv_val, unit="s") else: conv_val = Timedelta(conv_val) - conv_val = conv_val.as_unit("ns")._value + conv_val = conv_val.as_unit(unit)._value return TermValue(int(conv_val), conv_val, kind) + elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) result: npt.NDArray[np.intp] | np.intp | int diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6d6efdb6b5b03..8d84bef91bb03 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2702,8 +2702,12 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): # recreate with tz if indicated converted = _set_tz(converted, tz, dtype) - elif dtype == "timedelta64": - converted = np.asarray(converted, dtype="m8[ns]") + elif dtype.startswith("timedelta64"): + if dtype == "timedelta64": + # from before we started storing timedelta64 unit + converted = np.asarray(converted, dtype="m8[ns]") + else: + converted = np.asarray(converted, dtype=dtype) elif dtype == "date": try: converted = np.asarray( @@ -3086,8 +3090,13 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None tz = getattr(attrs, "tz", None) ret = _set_tz(ret, tz, dtype) - elif dtype == "timedelta64": - ret = np.asarray(ret, dtype="m8[ns]") + elif dtype and dtype.startswith("timedelta64"): + if dtype == "timedelta64": + # This was written back before we started writing + # timedelta64 units + ret = np.asarray(ret, dtype="m8[ns]") + else: + ret = np.asarray(ret, dtype=dtype) if transposed: return ret.T @@ -3324,7 +3333,7 @@ def write_array( node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]" elif lib.is_np_dtype(value.dtype, "m"): self._handle.create_array(self.group, key, value.view("i8")) - getattr(self.group, key)._v_attrs.value_type = "timedelta64" + getattr(self.group, key)._v_attrs.value_type = str(value.dtype) elif isinstance(value, BaseStringArray): vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value.to_numpy()) @@ -5175,8 +5184,12 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray index = DatetimeIndex(data) else: index = DatetimeIndex(data.view(kind)) - elif kind == "timedelta64": - index = TimedeltaIndex(data) + elif kind.startswith("timedelta64"): + if kind == "timedelta64": + # created before we stored resolution information + index = TimedeltaIndex(data) + else: + index = TimedeltaIndex(data.view(kind)) elif kind == "date": try: index = np.asarray([date.fromordinal(v) for v in data], dtype=object) @@ -5413,7 +5426,7 @@ def _dtype_to_kind(dtype_str: str) -> str: elif dtype_str.startswith("datetime64"): kind = dtype_str elif dtype_str.startswith("timedelta"): - kind = "timedelta64" + kind = dtype_str elif dtype_str.startswith("bool"): kind = "bool" elif dtype_str.startswith("category"): diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 1cb8162a402d8..2f525b1f6897c 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string): store.append("df", df) -def test_append_with_timedelta(tmp_path): +def test_append_with_timedelta(tmp_path, unit): # GH 3577 # append timedelta @@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path): } ) df["C"] = df["A"] - df["B"] + df["C"] = df["C"].astype(f"m8[{unit}]") df.loc[3:5, "C"] = np.nan path = tmp_path / "test_append_with_timedelta.h5" diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 8f814ed3b05ff..d11495902f76c 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1017,11 +1017,12 @@ def test_duplicate_column_name(tmp_path, setup_path): assert other.equals(df) -@pytest.mark.xfail(reason="non-nano TimedeltaIndex does not round-trip") -def test_preserve_timedeltaindex_type(setup_path): +def test_preserve_timedeltaindex_type(setup_path, unit): # GH9635 df = DataFrame(np.random.default_rng(2).normal(size=(10, 5))) - df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") + df.index = timedelta_range( + start="0s", periods=10, freq="1s", name="example", unit=unit + ) with ensure_clean_store(setup_path) as store: store["df"] = df