Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ from pandas._libs.tslibs.nattype cimport (
)
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.period cimport is_period_object
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
from pandas._libs.tslibs.timezones cimport tz_compare

# constants that will be compared to potentially arbitrarily large
Expand Down Expand Up @@ -2670,11 +2669,6 @@ def maybe_convert_objects(ndarray[object] objects,
elif is_timedelta(val):
if convert_non_numeric:
seen.timedelta_ = True
try:
convert_to_timedelta64(val, "ns")
except OutOfBoundsTimedelta:
seen.object_ = True
break
break
else:
seen.object_ = True
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1

cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
cdef (int64_t, int) precision_from_unit(
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
)

cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)

Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1
cpdef int64_t delta_to_nanoseconds(
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)


Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def array_to_timedelta64(
values: npt.NDArray[np.object_],
unit: str | None = ...,
errors: str = ...,
creso: int = ...,
) -> np.ndarray: ... # np.ndarray[m8ns]
def parse_timedelta_unit(unit: str | None) -> UnitChoices: ...
def delta_to_nanoseconds(
Expand Down
187 changes: 114 additions & 73 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.dtypes cimport (
c_DEPR_UNITS,
Expand Down Expand Up @@ -289,68 +288,6 @@ cpdef int64_t delta_to_nanoseconds(
) from err


@cython.overflowcheck(True)
cdef object ensure_td64ns(object ts):
"""
Overflow-safe implementation of td64.astype("m8[ns]")

Parameters
----------
ts : np.timedelta64

Returns
-------
np.timedelta64[ns]
"""
cdef:
NPY_DATETIMEUNIT td64_unit
int64_t td64_value, mult

td64_unit = get_datetime64_unit(ts)
if (
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
):

td64_value = cnp.get_timedelta64_value(ts)

mult = precision_from_unit(td64_unit)[0]
try:
# NB: cython#1381 this cannot be *=
td64_value = td64_value * mult
except OverflowError as err:
raise OutOfBoundsTimedelta(ts) from err

return np.timedelta64(td64_value, "ns")

return ts


cdef convert_to_timedelta64(object ts, str unit):
"""
Convert an incoming object to a timedelta64 if possible.
Before calling, unit must be standardized to avoid repeated unit conversion

Handle these types of objects:
- timedelta/Timedelta

Return an timedelta64[ns] object
"""
# Caller is responsible for checking unit not in ["Y", "y", "M"]
if isinstance(ts, _Timedelta):
# already in the proper format
if ts._creso != NPY_FR_ns:
ts = ts.as_unit("ns").asm8
else:
ts = np.timedelta64(ts._value, "ns")

elif PyDelta_Check(ts):
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
elif not cnp.is_timedelta64_object(ts):
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
return ts.astype("timedelta64[ns]")


cdef _numeric_to_td64ns(object item, str unit):
# caller is responsible for checking
# assert unit not in ["Y", "y", "M"]
Expand All @@ -369,10 +306,34 @@ cdef _numeric_to_td64ns(object item, str unit):
return ts


# TODO: de-duplicate with DatetimeParseState
cdef class ResoState:
cdef:
NPY_DATETIMEUNIT creso
bint creso_ever_changed

def __cinit__(self, NPY_DATETIMEUNIT creso):
self.creso = creso
self.creso_ever_changed = False

cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept:
# Return a bool indicating whether we bumped to a higher resolution
if self.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
self.creso = item_reso
elif item_reso > self.creso:
self.creso = item_reso
self.creso_ever_changed = True
return True
return False


@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(
ndarray values, str unit=None, str errors="raise"
ndarray values,
str unit=None,
str errors="raise",
NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC,
) -> ndarray:
# values is object-dtype, may be 2D
"""
Expand All @@ -394,6 +355,10 @@ def array_to_timedelta64(
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
cnp.flatiter it
str parsed_unit = parse_timedelta_unit(unit or "ns")
NPY_DATETIMEUNIT item_reso
ResoState state = ResoState(creso)
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
ndarray iresult = result.view("i8")

if values.descr.type_num != cnp.NPY_OBJECT:
# raise here otherwise we segfault below
Expand Down Expand Up @@ -421,18 +386,58 @@ def array_to_timedelta64(
ival = NPY_NAT

elif cnp.is_timedelta64_object(item):
td64ns_obj = ensure_td64ns(item)
ival = cnp.get_timedelta64_value(td64ns_obj)
# TODO: de-duplicate this with Timedelta.__new__
ival = cnp.get_timedelta64_value(item)
dt64_reso = get_datetime64_unit(item)
if not (
is_supported_unit(dt64_reso) or
dt64_reso in [
NPY_DATETIMEUNIT.NPY_FR_m,
NPY_DATETIMEUNIT.NPY_FR_h,
NPY_DATETIMEUNIT.NPY_FR_D,
NPY_DATETIMEUNIT.NPY_FR_W,
NPY_DATETIMEUNIT.NPY_FR_GENERIC
]
):
err = npy_unit_to_abbrev(dt64_reso)
raise ValueError(
f"Unit {err} is not supported. "
"Only unambiguous timedelta values durations are supported. "
"Allowed units are 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'")

item_reso = get_supported_reso(dt64_reso)
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
if dt64_reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
try:
ival = convert_reso(
ival,
dt64_reso,
creso,
round_ok=True,
)
except (OverflowError, OutOfBoundsDatetime) as err:
raise OutOfBoundsTimedelta(item) from err
else:
# e.g. NaT
pass

elif isinstance(item, _Timedelta):
if item._creso != NPY_FR_ns:
ival = item.as_unit("ns")._value
else:
ival = item._value
item_reso = item._creso
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

ival = (<_Timedelta>item)._as_creso(creso)._value

elif PyDelta_Check(item):
# i.e. isinstance(item, timedelta)
ival = delta_to_nanoseconds(item)
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
ival = delta_to_nanoseconds(item, reso=creso)

elif isinstance(item, str):
if (
Expand All @@ -443,13 +448,27 @@ def array_to_timedelta64(
else:
ival = parse_timedelta_string(item)

item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

elif is_tick_object(item):
ival = item.nanos
item_reso = get_supported_reso(item._creso)
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
ival = delta_to_nanoseconds(item, reso=creso)

elif is_integer_object(item) or is_float_object(item):
td64ns_obj = _numeric_to_td64ns(item, parsed_unit)
ival = cnp.get_timedelta64_value(td64ns_obj)

item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

else:
raise TypeError(f"Invalid type for timedelta scalar: {type(item)}")

Expand All @@ -467,7 +486,29 @@ def array_to_timedelta64(

cnp.PyArray_MultiIter_NEXT(mi)

return result
if infer_reso:
if state.creso_ever_changed:
# We encountered mismatched resolutions, need to re-parse with
# the correct one.
return array_to_timedelta64(
values,
unit=unit,
errors=errors,
creso=state.creso,
)
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = iresult.view("m8[s]")
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"m8[{abbrev}]")

abbrev = npy_unit_to_abbrev(creso)
return result.view(f"m8[{abbrev}]")


@cython.cpow(True)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ def _objects_to_td64ns(
values = np.asarray(data, dtype=np.object_)

result = array_to_timedelta64(values, unit=unit, errors=errors)
return result.view("timedelta64[ns]")
return result


def _validate_td64_dtype(dtype) -> DtypeObj:
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ def test_tdi_add_overflow(self):
)

# These should not overflow!
exp = TimedeltaIndex([NaT])
exp = TimedeltaIndex([NaT], dtype="m8[ns]")
result = pd.to_timedelta([NaT]) - Timedelta("1 days")
tm.assert_index_equal(result, exp)

Expand Down Expand Up @@ -2216,7 +2216,7 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names):

def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
# GH#39750 make sure we infer the result as td64
tdi = TimedeltaIndex([NaT, NaT])
tdi = TimedeltaIndex([NaT, NaT], dtype="m8[ns]")

left = tm.box_expected(tdi, box_with_array)
right = np.array([2, 2.0], dtype=object)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def test_maybe_convert_objects_datetime(self):
tm.assert_numpy_array_equal(out, exp)

arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[s]")
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
tm.assert_numpy_array_equal(out, exp)

Expand Down Expand Up @@ -863,7 +863,7 @@ def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
if dtype == "datetime64[ns]":
expected = np.array(["2363-10-04"], dtype="M8[us]")
else:
expected = arr
expected = arr.astype("m8[us]")
tm.assert_numpy_array_equal(out, expected)

def test_maybe_convert_objects_mixed_datetimes(self):
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,14 +591,6 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
if data.dtype._is_numeric:
mark = pytest.mark.xfail(reason="skew not implemented")
request.applymarker(mark)
elif (
op_name in ["std", "sem"]
and pa.types.is_date64(data._pa_array.type)
and skipna
):
# overflow
mark = pytest.mark.xfail(reason="Cannot cast")
request.applymarker(mark)
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_dtypes_timedeltas(self):
)
result = df.dtypes
expected = Series(
[np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB")
[np.dtype("datetime64[ns]"), np.dtype("timedelta64[us]")], index=list("AB")
)
tm.assert_series_equal(result, expected)

Expand All @@ -112,7 +112,7 @@ def test_dtypes_timedeltas(self):
expected = Series(
[
np.dtype("datetime64[ns]"),
np.dtype("timedelta64[ns]"),
np.dtype("timedelta64[us]"),
np.dtype("datetime64[ns]"),
],
index=list("ABC"),
Expand All @@ -125,7 +125,7 @@ def test_dtypes_timedeltas(self):
expected = Series(
[
np.dtype("datetime64[ns]"),
np.dtype("timedelta64[ns]"),
np.dtype("timedelta64[us]"),
np.dtype("datetime64[ns]"),
np.dtype("int64"),
],
Expand Down
Loading
Loading