pandas-dev · mroeschke · Oct 14, 2022 · Oct 9, 2022 · Oct 9, 2022 · Oct 9, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -121,6 +121,8 @@ Other API changes
 - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`)
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
+- Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`)
+- Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`)
 - Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
 -
 

diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py
@@ -31,13 +31,17 @@
     "periods_per_day",
     "periods_per_second",
     "is_supported_unit",
+    "npy_unit_to_abbrev",
+    "get_supported_reso",
 ]
 
 from pandas._libs.tslibs import dtypes
 from pandas._libs.tslibs.conversion import localize_pydatetime
 from pandas._libs.tslibs.dtypes import (
     Resolution,
+    get_supported_reso,
     is_supported_unit,
+    npy_unit_to_abbrev,
     periods_per_day,
     periods_per_second,
 )

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -20,9 +20,11 @@
     Tick,
     Timedelta,
     astype_overflowsafe,
+    get_supported_reso,
     get_unit_from_dtype,
     iNaT,
     is_supported_unit,
+    npy_unit_to_abbrev,
     periods_per_second,
     to_offset,
 )
@@ -197,28 +199,29 @@ def _simple_new(  # type: ignore[override]
         return result
 
     @classmethod
-    def _from_sequence(
-        cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False
-    ) -> TimedeltaArray:
+    def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> TimedeltaArray:
         if dtype:
-            _validate_td64_dtype(dtype)
+            dtype = _validate_td64_dtype(dtype)
 
         data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
         freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
 
+        if dtype is not None:
+            data = astype_overflowsafe(data, dtype=dtype, copy=False)
+
         return cls._simple_new(data, dtype=data.dtype, freq=freq)
 
     @classmethod
     def _from_sequence_not_strict(
         cls,
         data,
-        dtype=TD64NS_DTYPE,
+        dtype=None,
         copy: bool = False,
         freq=lib.no_default,
         unit=None,
     ) -> TimedeltaArray:
         if dtype:
-            _validate_td64_dtype(dtype)
+            dtype = _validate_td64_dtype(dtype)
 
         assert unit not in ["Y", "y", "M"]  # caller is responsible for checking
 
@@ -232,6 +235,9 @@ def _from_sequence_not_strict(
         if explicit_none:
             freq = None
 
+        if dtype is not None:
+            data = astype_overflowsafe(data, dtype=dtype, copy=False)
+
         result = cls._simple_new(data, dtype=data.dtype, freq=freq)
 
         if inferred_freq is None and freq is not None:
@@ -944,9 +950,13 @@ def sequence_to_td64ns(
         copy = False
 
     elif is_timedelta64_dtype(data.dtype):
-        if data.dtype != TD64NS_DTYPE:
-            # non-nano unit
-            data = astype_overflowsafe(data, dtype=TD64NS_DTYPE)
+        data_unit = get_unit_from_dtype(data.dtype)
+        if not is_supported_unit(data_unit):
+            # cast to closest supported unit, i.e. s or ns
+            new_reso = get_supported_reso(data_unit)
+            new_unit = npy_unit_to_abbrev(new_reso)
+            new_dtype = np.dtype(f"m8[{new_unit}]")
+            data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
             copy = False
 
     else:
@@ -955,7 +965,9 @@ def sequence_to_td64ns(
 
     data = np.array(data, copy=copy)
 
-    assert data.dtype == "m8[ns]", data
+    assert data.dtype.kind == "m"
+    assert data.dtype != "m8"  # i.e. not unit-less
+
     return data, inferred_freq
 
 
@@ -1045,7 +1057,11 @@ def _validate_td64_dtype(dtype) -> DtypeObj:
         )
         raise ValueError(msg)
 
-    if not is_dtype_equal(dtype, TD64NS_DTYPE):
+    if (
+        not isinstance(dtype, np.dtype)
+        or dtype.kind != "m"
+        or not is_supported_unit(get_unit_from_dtype(dtype))
+    ):
         raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]")
 
     return dtype
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -31,6 +31,10 @@
     Timedelta,
     Timestamp,
     astype_overflowsafe,
+    get_supported_reso,
+    get_unit_from_dtype,
+    is_supported_unit,
+    npy_unit_to_abbrev,
 )
 from pandas._libs.tslibs.timedeltas import array_to_timedelta64
 from pandas._typing import (
@@ -1456,8 +1460,11 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
     """
     Convert dtypes with granularity less than nanosecond to nanosecond
 
-    >>> _ensure_nanosecond_dtype(np.dtype("M8[s]"))
-    dtype('<M8[ns]')
+    >>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))
+    dtype('<M8[s]')
+
+    >>> _ensure_nanosecond_dtype(np.dtype("M8[us]"))
+    dtype('<M8[us]')
 
     >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]"))
     Traceback (most recent call last):
@@ -1476,13 +1483,15 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
         # i.e. datetime64tz
         pass
 
-    elif dtype.kind == "M" and dtype != DT64NS_DTYPE:
+    elif dtype.kind == "M" and not is_supported_unit(get_unit_from_dtype(dtype)):
         # pandas supports dtype whose granularity is less than [ns]
         # e.g., [ps], [fs], [as]
         if dtype <= np.dtype("M8[ns]"):
             if dtype.name == "datetime64":
                 raise ValueError(msg)
-            dtype = DT64NS_DTYPE
+            reso = get_supported_reso(get_unit_from_dtype(dtype))
+            unit = npy_unit_to_abbrev(reso)
+            dtype = np.dtype(f"M8[{unit}]")
         else:
             raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]")
 
@@ -1492,7 +1501,9 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
         if dtype <= np.dtype("m8[ns]"):
             if dtype.name == "timedelta64":
                 raise ValueError(msg)
-            dtype = TD64NS_DTYPE
+            reso = get_supported_reso(get_unit_from_dtype(dtype))
+            unit = npy_unit_to_abbrev(reso)
+            dtype = np.dtype(f"m8[{unit}]")
         else:
             raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
     return dtype

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -12,7 +12,6 @@
 from pandas._typing import DtypeObj
 
 from pandas.core.dtypes.common import (
-    TD64NS_DTYPE,
     is_scalar,
     is_timedelta64_dtype,
 )
@@ -121,7 +120,7 @@ def __new__(
         unit=None,
         freq=lib.no_default,
         closed=None,
-        dtype=TD64NS_DTYPE,
+        dtype=None,
         copy: bool = False,
         name=None,
     ):

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -152,9 +152,11 @@ def test_mul_td64arr(self, left, box_cls):
         right = np.array([1, 2, 3], dtype="m8[s]")
         right = box_cls(right)
 
-        expected = TimedeltaIndex(["10s", "40s", "90s"])
+        expected = TimedeltaIndex(["10s", "40s", "90s"], dtype=right.dtype)
+
         if isinstance(left, Series) or box_cls is Series:
             expected = Series(expected)
+        assert expected.dtype == right.dtype
 
         result = left * right
         tm.assert_equal(result, expected)
@@ -171,9 +173,10 @@ def test_div_td64arr(self, left, box_cls):
         right = np.array([10, 40, 90], dtype="m8[s]")
         right = box_cls(right)
 
-        expected = TimedeltaIndex(["1s", "2s", "3s"])
+        expected = TimedeltaIndex(["1s", "2s", "3s"], dtype=right.dtype)
         if isinstance(left, Series) or box_cls is Series:
             expected = Series(expected)
+        assert expected.dtype == right.dtype
 
         result = right / left
         tm.assert_equal(result, expected)
@@ -206,12 +209,12 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array):
         box = box_with_array
         index = numeric_idx
         expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))])
-        if isinstance(scalar_td, np.timedelta64) and box not in [Index, Series]:
+        if isinstance(scalar_td, np.timedelta64):
             # TODO(2.0): once TDA.astype converts to m8, just do expected.astype
             tda = expected._data
             dtype = scalar_td.dtype
             expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
-        elif type(scalar_td) is timedelta and box not in [Index, Series]:
+        elif type(scalar_td) is timedelta:
             # TODO(2.0): once TDA.astype converts to m8, just do expected.astype
             tda = expected._data
             dtype = np.dtype("m8[us]")
@@ -247,7 +250,7 @@ def test_numeric_arr_mul_tdscalar_numexpr_path(
         obj = tm.box_expected(arr, box, transpose=False)
 
         expected = arr_i8.view("timedelta64[D]").astype("timedelta64[ns]")
-        if type(scalar_td) is timedelta and box is array:
+        if type(scalar_td) is timedelta:
             # TODO(2.0): this shouldn't depend on 'box'
             expected = expected.astype("timedelta64[us]")
             # TODO(2.0): won't be necessary to construct TimedeltaArray
@@ -268,15 +271,15 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
         index = numeric_idx[1:3]
 
         expected = TimedeltaIndex(["3 Days", "36 Hours"])
-        if isinstance(three_days, np.timedelta64) and box not in [Index, Series]:
+        if isinstance(three_days, np.timedelta64):
             # TODO(2.0): just use expected.astype
             tda = expected._data
             dtype = three_days.dtype
             if dtype < np.dtype("m8[s]"):
                 # i.e. resolution is lower -> use lowest supported resolution
                 dtype = np.dtype("m8[s]")
             expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
-        elif type(three_days) is timedelta and box not in [Index, Series]:
+        elif type(three_days) is timedelta:
             # TODO(2.0): just use expected.astype
             tda = expected._data
             dtype = np.dtype("m8[us]")

diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -271,7 +271,7 @@ def test_array_copy():
         ),
         (
             np.array([1, 2], dtype="m8[us]"),
-            TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")),
+            TimedeltaArray(np.array([1, 2], dtype="m8[us]")),
         ),
         # integer
         ([1, 2], IntegerArray._from_sequence([1, 2])),

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -484,15 +484,21 @@ def test_astype_to_timedelta_unit(self, unit):
         dtype = f"m8[{unit}]"
         arr = np.array([[1, 2, 3]], dtype=dtype)
         df = DataFrame(arr)
-        result = df.astype(dtype)
+        if unit in ["us", "ms", "s"]:
+            assert (df.dtypes == dtype).all()
+        else:
+            # We get the nearest supported unit, i.e. "s"
+            assert (df.dtypes == "m8[s]").all()
 
+        result = df.astype(dtype)
         if unit in ["m", "h", "D"]:
-            # We don't support these, so we use the old logic to convert to float
+            # We don't support these, so we use the pre-2.0 logic to convert to float
+            #  (xref GH#48979)
+
             expected = DataFrame(df.values.astype(dtype).astype(float))
         else:
-            tda = pd.core.arrays.TimedeltaArray._simple_new(arr, dtype=arr.dtype)
-            expected = DataFrame(tda)
-            assert (expected.dtypes == dtype).all()
+            # The conversion is a no-op, so we just get a copy
+            expected = df
 
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -215,14 +215,15 @@ def test_construction_with_mixed(self, float_string_frame):
 
     def test_construction_with_conversions(self):
 
-        # convert from a numpy array of non-ns timedelta64
+        # convert from a numpy array of non-ns timedelta64; as of 2.0 this does
+        #  *not* convert
         arr = np.array([1, 2, 3], dtype="timedelta64[s]")
         df = DataFrame(index=range(3))
         df["A"] = arr
         expected = DataFrame(
             {"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3)
         )
-        tm.assert_frame_equal(df, expected)
+        tm.assert_numpy_array_equal(df["A"].to_numpy(), arr)
 
         expected = DataFrame(
             {

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2075,18 +2075,19 @@ def test_constructor_datetimes_non_ns(self, order, dtype):
 
     @pytest.mark.parametrize("order", ["K", "A", "C", "F"])
     @pytest.mark.parametrize(
-        "dtype",
+        "unit",
         [
-            "timedelta64[D]",
-            "timedelta64[h]",
-            "timedelta64[m]",
-            "timedelta64[s]",
-            "timedelta64[ms]",
-            "timedelta64[us]",
-            "timedelta64[ns]",
+            "D",
+            "h",
+            "m",
+            "s",
+            "ms",
+            "us",
+            "ns",
         ],
     )
-    def test_constructor_timedelta_non_ns(self, order, dtype):
+    def test_constructor_timedelta_non_ns(self, order, unit):
+        dtype = f"timedelta64[{unit}]"
         na = np.array(
             [
                 [np.timedelta64(1, "D"), np.timedelta64(2, "D")],
@@ -2095,13 +2096,22 @@ def test_constructor_timedelta_non_ns(self, order, dtype):
             dtype=dtype,
             order=order,
         )
-        df = DataFrame(na).astype("timedelta64[ns]")
+        df = DataFrame(na)
+        if unit in ["D", "h", "m"]:
+            # we get the nearest supported unit, i.e. "s"
+            exp_unit = "s"
+        else:
+            exp_unit = unit
+        exp_dtype = np.dtype(f"m8[{exp_unit}]")
         expected = DataFrame(
             [
                 [Timedelta(1, "D"), Timedelta(2, "D")],
                 [Timedelta(4, "D"), Timedelta(5, "D")],
             ],
+            dtype=exp_dtype,
         )
+        # TODO(2.0): ideally we should get the same 'expected' without passing
+        #  dtype=exp_dtype.
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_for_list_with_dtypes(self):