Merge branch 'main' into api-timedelta-constructor

jbrockmendel · jbrockmendel · commit 2eb7a524ed47 · 2025-12-02T07:28:26.000-08:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -552,29 +552,55 @@ small behavior differences as collateral:
 Changed treatment of NaN values in pyarrow and numpy-nullable floating dtypes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), ``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. This was done to make adoption easier, but caused some confusion (:issue:`32265`). In 3.0, an option ``"mode.nan_is_na"`` (default ``True``) controls whether to treat ``NaN`` as equivalent to :class:`NA`.
+Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``),
+``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others.
+This was done to make adoption easier, but caused some confusion (:issue:`32265`).
+In 3.0, this behaviour is made consistent to by default treat ``NaN`` as equivalent
+to :class:`NA` in all cases.
 
-With ``pd.set_option("mode.nan_is_na", True)`` (again, this is the default), ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` entries produce :class:`NA` entries instead:
+By default, ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__``
+and will be treated the same as :class:`NA`. The only change users will see is
+that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN``
+entries produce :class:`NA` entries instead.
 
 *Old behavior:*
 
 .. code-block:: ipython
 
-    In [2]: ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    # NaN in input gets converted to NA
+    In [1]: ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    In [2]: ser
+    Out[2]:
+    0     0.0
+    1    <NA>
+    dtype: Float64
+    # NaN produced by arithmetic (0/0) remained NaN
     In [3]: ser / 0
     Out[3]:
     0     NaN
     1    <NA>
     dtype: Float64
+    # the NaN value is not considered as missing
+    In [4]: (ser / 0).isna()
+    Out[4]:
+    0    False
+    1     True
+    dtype: bool
 
 *New behavior:*
 
 .. ipython:: python
 
-    ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    ser
     ser / 0
+    (ser / 0).isna()
 
-By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always considered distinct and specifically as a floating-point value, so cannot be used with integer dtypes:
+In the future, the intention is to consider ``NaN`` and :class:`NA` as distinct
+values, and an option to control this behaviour is added in 3.0 through
+``pd.options.future.distinguish_nan_and_na``. When enabled, ``NaN`` is always
+considered distinct and specifically as a floating-point value. As a consequence,
+it cannot be used with integer dtypes.
 
 *Old behavior:*
 
@@ -588,13 +614,21 @@ By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always
 
 .. ipython:: python
 
-    pd.set_option("mode.nan_is_na", False)
-    ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
-    ser[1]
+    with pd.option_context("future.distinguish_nan_and_na", True):
+        ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
+        print(ser[1])
+
+If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in
+the latter example, this would raise, as a float ``NaN`` cannot be held by an
+integer dtype.
 
-If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in the latter example, this would raise, as a float ``NaN`` cannot be held by an integer dtype.
+With ``"future.distinguish_nan_and_na"`` enabled, ``ser.to_numpy()`` (and
+``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if
+:class:`NA` entries are present, where before they would coerce to
+``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan``
+to :meth:`Series.to_numpy`.
 
-With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`.
+Note that the option is experimental and subject to change in future releases.
 
 The ``__module__`` attribute now points to public modules
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1192,6 +1226,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
+- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`)
 - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
   ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
 - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
@@ -1249,6 +1284,7 @@ Plotting
 - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`)
 - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`)
 - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
+- Bug in plotting with a :class:`TimedeltaIndex` with non-nanosecond resolution displaying incorrect labels (:issue:`63237`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -36,5 +36,5 @@ def using_string_dtype() -> bool:
 
 
 def is_nan_na() -> bool:
-    _mode_options = _global_config["mode"]
-    return _mode_options["nan_is_na"]
+    _mode_options = _global_config["future"]
+    return not _mode_options["distinguish_nan_and_na"]
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -2127,5 +2127,5 @@ def monkeysession():
 @pytest.fixture(params=[True, False])
 def using_nan_is_na(request):
     opt = request.param
-    with pd.option_context("mode.nan_is_na", opt):
+    with pd.option_context("future.distinguish_nan_and_na", not opt):
         yield opt
diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py
@@ -13,6 +13,7 @@
     Any,
     ClassVar,
     Self,
+    cast,
 )
 
 import numpy as np
@@ -44,7 +45,10 @@
 )
 
 if TYPE_CHECKING:
-    from pandas._typing import npt
+    from pandas._typing import (
+        TimeUnit,
+        npt,
+    )
 
 
 class PyTablesScope(_scope.Scope):
@@ -225,15 +229,19 @@ def stringify(value):
             if conv_val.tz is not None:
                 conv_val = conv_val.tz_convert("UTC")
             return TermValue(conv_val, conv_val._value, kind)
-        elif kind in ("timedelta64", "timedelta"):
+        elif kind.startswith("timedelta"):
+            unit = "ns"
+            if "[" in kind:
+                unit = cast("TimeUnit", kind.split("[")[-1][:-1])
             if isinstance(conv_val, str):
                 conv_val = Timedelta(conv_val)
             elif lib.is_integer(conv_val) or lib.is_float(conv_val):
                 conv_val = Timedelta(conv_val, unit="s")
             else:
                 conv_val = Timedelta(conv_val)
-            conv_val = conv_val.as_unit("ns")._value
+            conv_val = conv_val.as_unit(unit)._value
             return TermValue(int(conv_val), conv_val, kind)
+
         elif meta == "category":
             metadata = extract_array(self.metadata, extract_numpy=True)
             result: npt.NDArray[np.intp] | np.intp | int
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -428,15 +428,6 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
-    cf.register_option(
-        "nan_is_na",
-        os.environ.get("PANDAS_NAN_IS_NA", "1") == "1",
-        "Whether to treat NaN entries as interchangeable with pd.NA in "
-        "numpy-nullable and pyarrow float dtypes. See discussion in "
-        "https://github.com/pandas-dev/pandas/issues/32265",
-        validator=is_one_of_factory([True, False]),
-    )
-
 
 # user warnings
 chained_assignment = """
@@ -899,6 +890,18 @@ def register_converter_cb(key: str) -> None:
         validator=is_one_of_factory([True, False]),
     )
 
+    cf.register_option(
+        "distinguish_nan_and_na",
+        os.environ.get("PANDAS_FUTURE_DISTINGUISH_NAN_AND_NA", "0") == "1",
+        "Whether to treat NaN entries as distinct from pd.NA in "
+        "numpy-nullable and pyarrow float dtypes. By default treats both "
+        "interchangeable as missing values (NaN will be coerced to NA). "
+        "See discussion in "
+        "https://github.com/pandas-dev/pandas/issues/32265",
+        validator=is_one_of_factory([True, False]),
+    )
+
+
 # GH#59502
 cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)
 cf.deprecate_option(
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -997,7 +997,7 @@ def _read_ujson(self) -> DataFrame | Series:
         else:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
-            with option_context("mode.nan_is_na", True):
+            with option_context("future.distinguish_nan_and_na", False):
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
@@ -1075,7 +1075,7 @@ def __next__(self) -> DataFrame | Series:
             raise ex
 
         if self.dtype_backend is not lib.no_default:
-            with option_context("mode.nan_is_na", True):
+            with option_context("future.distinguish_nan_and_na", False):
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -386,7 +386,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame:
             'table="orient" can not yet read ISO-formatted Timedelta data'
         )
 
-    with option_context("mode.nan_is_na", True):
+    with option_context("future.distinguish_nan_and_na", False):
         df = df.astype(dtypes)
 
     if "primaryKey" in table["schema"]:
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -2702,8 +2702,12 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
             # recreate with tz if indicated
             converted = _set_tz(converted, tz, dtype)
 
-        elif dtype == "timedelta64":
-            converted = np.asarray(converted, dtype="m8[ns]")
+        elif dtype.startswith("timedelta64"):
+            if dtype == "timedelta64":
+                # from before we started storing timedelta64 unit
+                converted = np.asarray(converted, dtype="m8[ns]")
+            else:
+                converted = np.asarray(converted, dtype=dtype)
         elif dtype == "date":
             try:
                 converted = np.asarray(
@@ -3086,8 +3090,13 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
                 tz = getattr(attrs, "tz", None)
                 ret = _set_tz(ret, tz, dtype)
 
-            elif dtype == "timedelta64":
-                ret = np.asarray(ret, dtype="m8[ns]")
+            elif dtype and dtype.startswith("timedelta64"):
+                if dtype == "timedelta64":
+                    # This was written back before we started writing
+                    # timedelta64 units
+                    ret = np.asarray(ret, dtype="m8[ns]")
+                else:
+                    ret = np.asarray(ret, dtype=dtype)
 
         if transposed:
             return ret.T
@@ -3324,7 +3333,7 @@ def write_array(
             node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
         elif lib.is_np_dtype(value.dtype, "m"):
             self._handle.create_array(self.group, key, value.view("i8"))
-            getattr(self.group, key)._v_attrs.value_type = "timedelta64"
+            getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
         elif isinstance(value, BaseStringArray):
             vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom())
             vlarr.append(value.to_numpy())
@@ -5175,8 +5184,12 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray
             index = DatetimeIndex(data)
         else:
             index = DatetimeIndex(data.view(kind))
-    elif kind == "timedelta64":
-        index = TimedeltaIndex(data)
+    elif kind.startswith("timedelta64"):
+        if kind == "timedelta64":
+            # created before we stored resolution information
+            index = TimedeltaIndex(data)
+        else:
+            index = TimedeltaIndex(data.view(kind))
     elif kind == "date":
         try:
             index = np.asarray([date.fromordinal(v) for v in data], dtype=object)
@@ -5413,7 +5426,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
     elif dtype_str.startswith("datetime64"):
         kind = dtype_str
     elif dtype_str.startswith("timedelta"):
-        kind = "timedelta64"
+        kind = dtype_str
     elif dtype_str.startswith("bool"):
         kind = "bool"
     elif dtype_str.startswith("category"):
diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py
@@ -1107,7 +1107,7 @@ def __init__(self, unit: TimeUnit = "ns"):
     axis: Axis
 
     @staticmethod
-    def format_timedelta_ticks(x, pos, n_decimals: int, exp: int) -> str:
+    def format_timedelta_ticks(x, pos, n_decimals: int, exp: int = 9) -> str:
         """
         Convert seconds to 'D days HH:MM:SS.F'
         """
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
@@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string):
             store.append("df", df)
 
 
-def test_append_with_timedelta(tmp_path):
+def test_append_with_timedelta(tmp_path, unit):
     # GH 3577
     # append timedelta
 
@@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path):
         }
     )
     df["C"] = df["A"] - df["B"]
+    df["C"] = df["C"].astype(f"m8[{unit}]")
     df.loc[3:5, "C"] = np.nan
 
     path = tmp_path / "test_append_with_timedelta.h5"
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -1017,11 +1017,12 @@ def test_duplicate_column_name(tmp_path, setup_path):
     assert other.equals(df)
 
 
-@pytest.mark.xfail(reason="non-nano TimedeltaIndex does not round-trip")
-def test_preserve_timedeltaindex_type(setup_path):
+def test_preserve_timedeltaindex_type(setup_path, unit):
     # GH9635
     df = DataFrame(np.random.default_rng(2).normal(size=(10, 5)))
-    df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")
+    df.index = timedelta_range(
+        start="0s", periods=10, freq="1s", name="example", unit=unit
+    )
 
     with ensure_clean_store(setup_path) as store:
         store["df"] = df

Original file line number	Diff line number	Diff line change
`@@ -386,7 +386,7 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame:`
`386`	`386`	`'table="orient" can not yet read ISO-formatted Timedelta data'`
`387`	`387`	`)`
`388`	`388`
`389`		`- with option_context("mode.nan_is_na", True):`
	`389`	`+ with option_context("future.distinguish_nan_and_na", False):`
`390`	`390`	`df = df.astype(dtypes)`
`391`	`391`
`392`	`392`	`if "primaryKey" in table["schema"]:`
Original file line number	Diff line number	Diff line change
`@@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string):`
`848`	`848`	`store.append("df", df)`
`849`	`849`
`850`	`850`
`851`		`-def test_append_with_timedelta(tmp_path):`
	`851`	`+def test_append_with_timedelta(tmp_path, unit):`
`852`	`852`	`# GH 3577`
`853`	`853`	`# append timedelta`
`854`	`854`
`@@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path):`
`860`	`860`	`}`
`861`	`861`	`)`
`862`	`862`	`df["C"] = df["A"] - df["B"]`
	`863`	`+ df["C"] = df["C"].astype(f"m8[{unit}]")`
`863`	`864`	`df.loc[3:5, "C"] = np.nan`
`864`	`865`
`865`	`866`	`path = tmp_path / "test_append_with_timedelta.h5"`