From cc08de51394fce7bfbca04ac61cd8f14d0f7d6af Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 Nov 2025 16:30:42 -0800 Subject: [PATCH 1/5] BUG: pytables with non-nano timedelta64 --- pandas/core/computation/pytables.py | 5 ++++- pandas/io/pytables.py | 29 +++++++++++++++++++------- pandas/tests/io/pytables/test_store.py | 6 ++++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index ee6ac6584569e..ab07322e35f20 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -225,7 +225,9 @@ def stringify(value): if conv_val.tz is not None: conv_val = conv_val.tz_convert("UTC") return TermValue(conv_val, conv_val._value, kind) - elif kind in ("timedelta64", "timedelta"): + elif kind in ("timedelta64", "timedelta", "timedelta64[ns]"): + # TODO: other timedelta64 units? 2025-11-30 only + # test_append_with_timedelta gets here if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): @@ -234,6 +236,7 @@ def stringify(value): conv_val = Timedelta(conv_val) conv_val = conv_val.as_unit("ns")._value return TermValue(int(conv_val), conv_val, kind) + elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) result: npt.NDArray[np.intp] | np.intp | int diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6d6efdb6b5b03..8d84bef91bb03 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2702,8 +2702,12 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): # recreate with tz if indicated converted = _set_tz(converted, tz, dtype) - elif dtype == "timedelta64": - converted = np.asarray(converted, dtype="m8[ns]") + elif dtype.startswith("timedelta64"): + if dtype == "timedelta64": + # from before we started storing timedelta64 unit + converted = np.asarray(converted, dtype="m8[ns]") + else: + converted = np.asarray(converted, dtype=dtype) elif dtype == "date": try: converted = np.asarray( @@ -3086,8 +3090,13 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None tz = getattr(attrs, "tz", None) ret = _set_tz(ret, tz, dtype) - elif dtype == "timedelta64": - ret = np.asarray(ret, dtype="m8[ns]") + elif dtype and dtype.startswith("timedelta64"): + if dtype == "timedelta64": + # This was written back before we started writing + # timedelta64 units + ret = np.asarray(ret, dtype="m8[ns]") + else: + ret = np.asarray(ret, dtype=dtype) if transposed: return ret.T @@ -3324,7 +3333,7 @@ def write_array( node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]" elif lib.is_np_dtype(value.dtype, "m"): self._handle.create_array(self.group, key, value.view("i8")) - getattr(self.group, key)._v_attrs.value_type = "timedelta64" + getattr(self.group, key)._v_attrs.value_type = str(value.dtype) elif isinstance(value, BaseStringArray): vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value.to_numpy()) @@ -5175,8 +5184,12 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray index = DatetimeIndex(data) else: index = DatetimeIndex(data.view(kind)) - elif kind == "timedelta64": - index = TimedeltaIndex(data) + elif kind.startswith("timedelta64"): + if kind == "timedelta64": + # created before we stored resolution information + index = TimedeltaIndex(data) + else: + index = TimedeltaIndex(data.view(kind)) elif kind == "date": try: index = np.asarray([date.fromordinal(v) for v in data], dtype=object) @@ -5413,7 +5426,7 @@ def _dtype_to_kind(dtype_str: str) -> str: elif dtype_str.startswith("datetime64"): kind = dtype_str elif dtype_str.startswith("timedelta"): - kind = "timedelta64" + kind = dtype_str elif dtype_str.startswith("bool"): kind = "bool" elif dtype_str.startswith("category"): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index a80c7ea59c2b7..d11495902f76c 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1017,10 +1017,12 @@ def test_duplicate_column_name(tmp_path, setup_path): assert other.equals(df) -def test_preserve_timedeltaindex_type(setup_path): +def test_preserve_timedeltaindex_type(setup_path, unit): # GH9635 df = DataFrame(np.random.default_rng(2).normal(size=(10, 5))) - df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") + df.index = timedelta_range( + start="0s", periods=10, freq="1s", name="example", unit=unit + ) with ensure_clean_store(setup_path) as store: store["df"] = df From 202e3c43d0d1438fa0be3f4beed16ea8ec88dc99 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 Nov 2025 19:14:45 -0800 Subject: [PATCH 2/5] Fix doc example --- pandas/core/computation/pytables.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index ab07322e35f20..9fca73f4c1174 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -225,16 +225,19 @@ def stringify(value): if conv_val.tz is not None: conv_val = conv_val.tz_convert("UTC") return TermValue(conv_val, conv_val._value, kind) - elif kind in ("timedelta64", "timedelta", "timedelta64[ns]"): + elif kind.startswith("timedelta"): # TODO: other timedelta64 units? 2025-11-30 only # test_append_with_timedelta gets here + unit = "ns" + if "[" in kind: + unit = kind[-3:-1] if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): conv_val = Timedelta(conv_val, unit="s") else: conv_val = Timedelta(conv_val) - conv_val = conv_val.as_unit("ns")._value + conv_val = conv_val.as_unit(unit)._value return TermValue(int(conv_val), conv_val, kind) elif meta == "category": From 55bb9afcfdea3e56254929e30b2a79d03fe46d71 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 Nov 2025 22:13:36 -0800 Subject: [PATCH 3/5] mypy fixup --- pandas/core/computation/pytables.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 9fca73f4c1174..8ed4505ff9919 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -13,6 +13,7 @@ Any, ClassVar, Self, + cast, ) import numpy as np @@ -44,7 +45,10 @@ ) if TYPE_CHECKING: - from pandas._typing import npt + from pandas._typing import ( + TimeUnit, + npt, + ) class PyTablesScope(_scope.Scope): @@ -230,7 +234,7 @@ def stringify(value): # test_append_with_timedelta gets here unit = "ns" if "[" in kind: - unit = kind[-3:-1] + unit = cast("TimeUnit", kind[-3:-1]) if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): From 34f45da9dd67c7ccb8f0644e100da364ce7dc5dd Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Dec 2025 07:31:22 -0800 Subject: [PATCH 4/5] Whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5db05142aba98..509397ed2f1ff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1190,6 +1190,7 @@ MultiIndex I/O ^^^ - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`) +- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`) - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) From de5de0021a31248117cc1f90f1efce3e6a0d51ca Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Dec 2025 07:34:06 -0800 Subject: [PATCH 5/5] parametrize test --- pandas/core/computation/pytables.py | 4 +--- pandas/tests/io/pytables/test_append.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 8ed4505ff9919..ceac6fe9296df 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -230,11 +230,9 @@ def stringify(value): conv_val = conv_val.tz_convert("UTC") return TermValue(conv_val, conv_val._value, kind) elif kind.startswith("timedelta"): - # TODO: other timedelta64 units? 2025-11-30 only - # test_append_with_timedelta gets here unit = "ns" if "[" in kind: - unit = cast("TimeUnit", kind[-3:-1]) + unit = cast("TimeUnit", kind.split("[")[-1][:-1]) if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 1cb8162a402d8..2f525b1f6897c 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string): store.append("df", df) -def test_append_with_timedelta(tmp_path): +def test_append_with_timedelta(tmp_path, unit): # GH 3577 # append timedelta @@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path): } ) df["C"] = df["A"] - df["B"] + df["C"] = df["C"].astype(f"m8[{unit}]") df.loc[3:5, "C"] = np.nan path = tmp_path / "test_append_with_timedelta.h5"