pandas-dev · jreback · Apr 18, 2022 · Apr 7, 2022 · Apr 8, 2022 · Apr 8, 2022
diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd
@@ -1,6 +1,8 @@
 from cpython.datetime cimport timedelta
 from numpy cimport int64_t
 
+from .np_datetime cimport NPY_DATETIMEUNIT
+
 
 # Exposed for tslib, not intended for outside use.
 cpdef int64_t delta_to_nanoseconds(delta) except? -1
@@ -13,7 +15,9 @@ cdef class _Timedelta(timedelta):
         int64_t value      # nanoseconds
         bint _is_populated  # are my components populated
         int64_t _d, _h, _m, _s, _ms, _us, _ns
+        NPY_DATETIMEUNIT _reso
 
     cpdef timedelta to_pytimedelta(_Timedelta self)
     cdef bint _has_ns(self)
     cdef _ensure_components(_Timedelta self)
+    cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -45,13 +45,19 @@ from pandas._libs.tslibs.nattype cimport (
 )
 from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
+    NPY_FR_ns,
+    cmp_dtstructs,
     cmp_scalar,
     get_datetime64_unit,
     get_timedelta64_value,
+    npy_datetimestruct,
+    pandas_datetime_to_datetimestruct,
+    pandas_timedelta_to_timedeltastruct,
     pandas_timedeltastruct,
-    td64_to_tdstruct,
 )
+
 from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta
+
 from pandas._libs.tslibs.offsets cimport is_tick_object
 from pandas._libs.tslibs.util cimport (
     is_array,
@@ -176,7 +182,9 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
     if is_tick_object(delta):
         return delta.nanos
     if isinstance(delta, _Timedelta):
-        return delta.value
+        if delta._reso == NPY_FR_ns:
+            return delta.value
+        raise NotImplementedError(delta._reso)
 
     if is_timedelta64_object(delta):
         return get_timedelta64_value(ensure_td64ns(delta))
@@ -251,6 +259,8 @@ cdef convert_to_timedelta64(object ts, str unit):
         return np.timedelta64(NPY_NAT, "ns")
     elif isinstance(ts, _Timedelta):
         # already in the proper format
+        if ts._reso != NPY_FR_ns:
+            raise NotImplementedError
         ts = np.timedelta64(ts.value, "ns")
     elif is_timedelta64_object(ts):
         ts = ensure_td64ns(ts)
@@ -643,7 +653,8 @@ cdef bint _validate_ops_compat(other):
 
 def _op_unary_method(func, name):
     def f(self):
-        return Timedelta(func(self.value), unit='ns')
+        new_value = func(self.value)
+        return _timedelta_from_value_and_reso(new_value, self._reso)
     f.__name__ = name
     return f
 
@@ -688,7 +699,17 @@ def _binary_op_method_timedeltalike(op, name):
         if other is NaT:
             # e.g. if original other was timedelta64('NaT')
             return NaT
-        return Timedelta(op(self.value, other.value), unit='ns')
+
+        if self._reso != other._reso:
+            raise NotImplementedError
+
+        res = op(self.value, other.value)
+        if res == NPY_NAT:
+            # e.g. test_implementation_limits
+            # TODO: more generally could do an overflowcheck in op?
+            return NaT
+
+        return _timedelta_from_value_and_reso(res, reso=self._reso)
 
     f.__name__ = name
     return f
@@ -818,6 +839,38 @@ cdef _to_py_int_float(v):
     raise TypeError(f"Invalid type {type(v)}. Must be int or float.")
 
 
+def _timedelta_unpickle(value, reso):
+    return _timedelta_from_value_and_reso(value, reso)
+
+
+cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
+    # Could make this a classmethod if/when cython supports cdef classmethods
+    cdef:
+        _Timedelta td_base
+
+    if reso == NPY_FR_ns:
+        td_base = _Timedelta.__new__(Timedelta, microseconds=int(value) // 1000)
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
+        td_base = _Timedelta.__new__(Timedelta, microseconds=int(value))
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
+        td_base = _Timedelta.__new__(Timedelta, milliseconds=int(value))
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
+        td_base = _Timedelta.__new__(Timedelta, seconds=int(value))
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
+        td_base = _Timedelta.__new__(Timedelta, minutes=int(value))
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
+        td_base = _Timedelta.__new__(Timedelta, hours=int(value))
+    elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
+        td_base = _Timedelta.__new__(Timedelta, days=int(value))
+    else:
+        raise NotImplementedError(reso)
+
+    td_base.value = value
+    td_base._is_populated = 0
+    td_base._reso = reso
+    return td_base
+
+
 # Similar to Timestamp/datetime, this is a construction requirement for
 # timedeltas that we need to do object instantiation in python. This will
 # serve as a C extension type that shadows the Python class, where we do any
@@ -827,6 +880,7 @@ cdef class _Timedelta(timedelta):
     #    int64_t value      # nanoseconds
     #    bint _is_populated  # are my components populated
     #    int64_t _d, _h, _m, _s, _ms, _us, _ns
+    #    NPY_DATETIMEUNIT _reso
 
     # higher than np.ndarray and np.matrix
     __array_priority__ = 100
@@ -853,7 +907,12 @@ cdef class _Timedelta(timedelta):
 
     def __hash__(_Timedelta self):
         if self._has_ns():
-            return hash(self.value)
+            # Note: this does *not* satisfy the invariance
+            #  td1 == td2 \\Rightarrow hash(td1) == hash(td2)
+            #  if td1 and td2 have different _resos. timedelta64 also has this
+            #  non-invariant behavior.
+            #  see GH#44504
+            return self.value
         else:
             return timedelta.__hash__(self)
 
@@ -890,10 +949,30 @@ cdef class _Timedelta(timedelta):
         else:
             return NotImplemented
 
-        return cmp_scalar(self.value, ots.value, op)
+        if self._reso == ots._reso:
+            return cmp_scalar(self.value, ots.value, op)
+        return self._compare_mismatched_resos(ots, op)
+
+    # TODO: re-use/share with Timestamp
+    cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op):
+        # Can't just dispatch to numpy as they silently overflow and get it wrong
+        cdef:
+            npy_datetimestruct dts_self
+            npy_datetimestruct dts_other
+
+        # dispatch to the datetimestruct utils instead of writing new ones!
+        pandas_datetime_to_datetimestruct(self.value, self._reso, &dts_self)
+        pandas_datetime_to_datetimestruct(other.value, other._reso, &dts_other)
+        return cmp_dtstructs(&dts_self,  &dts_other, op)
 
     cdef bint _has_ns(self):
-        return self.value % 1000 != 0
+        if self._reso == NPY_FR_ns:
+            return self.value % 1000 != 0
+        elif self._reso < NPY_FR_ns:
+            # i.e. seconds, millisecond, microsecond
+            return False
+        else:
+            raise NotImplementedError(self._reso)
 
     cdef _ensure_components(_Timedelta self):
         """
@@ -905,7 +984,7 @@ cdef class _Timedelta(timedelta):
         cdef:
             pandas_timedeltastruct tds
 
-        td64_to_tdstruct(self.value, &tds)
+        pandas_timedelta_to_timedeltastruct(self.value, self._reso, &tds)
         self._d = tds.days
         self._h = tds.hrs
         self._m = tds.min
@@ -937,13 +1016,24 @@ cdef class _Timedelta(timedelta):
         -----
         Any nanosecond resolution will be lost.
         """
-        return timedelta(microseconds=int(self.value) / 1000)
+        if self._reso == NPY_FR_ns:
+            return timedelta(microseconds=int(self.value) / 1000)
+
+        # TODO(@WillAyd): is this the right way to use components?
+        self._ensure_components()
+        return timedelta(
+            days=self._d, seconds=self._seconds, microseconds=self._microseconds
+        )
 
     def to_timedelta64(self) -> np.timedelta64:
         """
         Return a numpy.timedelta64 object with 'ns' precision.
         """
-        return np.timedelta64(self.value, 'ns')
+        cdef:
+            str abbrev = npy_unit_to_abbrev(self._reso)
+        # TODO: way to create a np.timedelta64 obj with the reso directly
+        #  instead of having to get the abbrev?
+        return np.timedelta64(self.value, abbrev)
 
     def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
         """
@@ -1054,7 +1144,7 @@ cdef class _Timedelta(timedelta):
         >>> td.asm8
         numpy.timedelta64(42,'ns')
         """
-        return np.int64(self.value).view('m8[ns]')
+        return self.to_timedelta64()
 
     @property
     def resolution_string(self) -> str:
@@ -1258,6 +1348,14 @@ cdef class _Timedelta(timedelta):
                f'H{components.minutes}M{seconds}S')
         return tpl
 
+    # ----------------------------------------------------------------
+    # Constructors
+
+    @classmethod
+    def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
+        # exposing as classmethod for testing
+        return _timedelta_from_value_and_reso(value, reso)
+
 
 # Python front end to C extension type _Timedelta
 # This serves as the box for timedelta64
@@ -1413,19 +1511,21 @@ class Timedelta(_Timedelta):
         if value == NPY_NAT:
             return NaT
 
-        # make timedelta happy
-        td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
-        td_base.value = value
-        td_base._is_populated = 0
-        return td_base
+        return _timedelta_from_value_and_reso(value, NPY_FR_ns)
 
     def __setstate__(self, state):
-        (value) = state
+        if len(state) == 1:
+            # older pickle, only supported nanosecond
+            value = state[0]
+            reso = NPY_FR_ns
+        else:
+            value, reso = state
         self.value = value
+        self._reso = reso
 
     def __reduce__(self):
-        object_state = self.value,
-        return (Timedelta, object_state)
+        object_state = self.value, self._reso
+        return (_timedelta_unpickle, object_state)
 
     @cython.cdivision(True)
     def _round(self, freq, mode):
@@ -1496,7 +1596,14 @@ class Timedelta(_Timedelta):
 
     def __mul__(self, other):
         if is_integer_object(other) or is_float_object(other):
-            return Timedelta(other * self.value, unit='ns')
+            if util.is_nan(other):
+                # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
+                return NaT
+
+            return _timedelta_from_value_and_reso(
+                <int64_t>(other * self.value),
+                reso=self._reso,
+            )
 
         elif is_array(other):
             # ndarray-like

diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -24,6 +24,79 @@
 import pandas._testing as tm
 
 
+class TestNonNano:
+    @pytest.fixture(params=[7, 8, 9])
+    def unit(self, request):
+        # 7, 8, 9 correspond to second, millisecond, and microsecond, respectively
+        return request.param
+
+    @pytest.fixture
+    def val(self, unit):
+        # microsecond that would be just out of bounds for nano
+        us = 9223372800000000
+        if unit == 9:
+            value = us
+        elif unit == 8:
+            value = us // 1000
+        else:
+            value = us // 1_000_000
+        return value
+
+    @pytest.fixture
+    def td(self, unit, val):
+        return Timedelta._from_value_and_reso(val, unit)
+
+    def test_from_value_and_reso(self, unit, val):
+        # Just checking that the fixture is giving us what we asked for
+        td = Timedelta._from_value_and_reso(val, unit)
+        assert td.value == val
+        assert td._reso == unit
+        assert td.days == 106752
+
+    def test_unary_non_nano(self, td, unit):
+        assert abs(td)._reso == unit
+        assert (-td)._reso == unit
+        assert (+td)._reso == unit
+
+    def test_sub_preserves_reso(self, td, unit):
+        res = td - td
+        expected = Timedelta._from_value_and_reso(0, unit)
+        assert res == expected
+        assert res._reso == unit
+
+    def test_mul_preserves_reso(self, td, unit):
+        # The td fixture should always be far from the implementation
+        #  bound, so doubling does not risk overflow.
+        res = td * 2
+        assert res.value == td.value * 2
+        assert res._reso == unit
+
+    def test_cmp_cross_reso(self, td):
+        other = Timedelta(days=106751, unit="ns")
+        assert other < td
+        assert td > other
+        assert not other == td
+        assert td != other
+
+    def test_to_pytimedelta(self, td):
+        res = td.to_pytimedelta()
+        expected = timedelta(days=106752)
+        assert type(res) is timedelta
+        assert res == expected
+
+    def test_to_timedelta64(self, td, unit):
+        for res in [td.to_timedelta64(), td.to_numpy(), td.asm8]:
+
+            assert isinstance(res, np.timedelta64)
+            assert res.view("i8") == td.value
+            if unit == 7:
+                assert res.dtype == "m8[s]"
+            elif unit == 8:
+                assert res.dtype == "m8[ms]"
+            elif unit == 9:
+                assert res.dtype == "m8[us]"
+
+
 class TestTimedeltaUnaryOps:
     def test_invert(self):
         td = Timedelta(10, unit="d")

diff --git a/setup.py b/setup.py
@@ -538,6 +538,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
     "_libs.tslibs.timedeltas": {
         "pyxfile": "_libs/tslibs/timedeltas",
         "depends": tseries_depends,
+        "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
     },
     "_libs.tslibs.timestamps": {
         "pyxfile": "_libs/tslibs/timestamps",