Skip to content

API: preserve reso in Timelta(td64_obj) #48910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 42 additions & 51 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
from pandas._libs.tslibs.dtypes cimport (
get_supported_reso,
npy_unit_to_abbrev,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
Expand Down Expand Up @@ -939,6 +942,7 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso):
cdef:
_Timedelta td_base

assert value != NPY_NAT
# For millisecond and second resos, we cannot actually pass int(value) because
# many cases would fall outside of the pytimedelta implementation bounds.
# We pass 0 instead, and override seconds, microseconds, days.
Expand Down Expand Up @@ -1704,10 +1708,27 @@ class Timedelta(_Timedelta):
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
if get_timedelta64_value(value) == NPY_NAT:
# Retain the resolution if possible, otherwise cast to the nearest
# supported resolution.
new_value = get_timedelta64_value(value)
if new_value == NPY_NAT:
# i.e. np.timedelta64("NaT")
return NaT
value = ensure_td64ns(value)

reso = get_datetime64_unit(value)
new_reso = get_supported_reso(reso)
if reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
try:
new_value = convert_reso(
get_timedelta64_value(value),
reso,
new_reso,
round_ok=True,
)
except (OverflowError, OutOfBoundsDatetime) as err:
raise OutOfBoundsTimedelta(value) from err
return cls._from_value_and_reso(new_value, reso=new_reso)

elif is_tick_object(value):
value = np.timedelta64(value.nanos, 'ns')
elif is_integer_object(value) or is_float_object(value):
Expand Down Expand Up @@ -1917,9 +1938,15 @@ class Timedelta(_Timedelta):

if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _floordiv)
# TODO: could suppress
# RuntimeWarning: invalid value encountered in floor_divide
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be great (especially if it comes from a pandas routine) so we can turn on -W:::pandas in the future

result = self.asm8 // other
mask = other.view("i8") == NPY_NAT
if mask.any():
# We differ from numpy here
result = result.astype("f8")
result[mask] = np.nan
return result

elif other.dtype.kind in ['i', 'u', 'f']:
if other.ndim == 0:
Expand Down Expand Up @@ -1951,9 +1978,15 @@ class Timedelta(_Timedelta):

if other.dtype.kind == 'm':
# also timedelta-like
if self._reso != NPY_FR_ns:
raise NotImplementedError
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
# TODO: could suppress
# RuntimeWarning: invalid value encountered in floor_divide
result = other // self.asm8
mask = other.view("i8") == NPY_NAT
if mask.any():
# We differ from numpy here
result = result.astype("f8")
result[mask] = np.nan
return result

# Includes integer array // Timedelta, disallowed in GH#19761
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')
Expand Down Expand Up @@ -2003,45 +2036,3 @@ cdef bint _should_cast_to_timedelta(object obj):
return (
is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
)


cdef _floordiv(int64_t value, right):
return value // right


cdef _rfloordiv(int64_t value, right):
# analogous to referencing operator.div, but there is no operator.rfloordiv
return right // value


cdef _broadcast_floordiv_td64(
int64_t value,
ndarray other,
object (*operation)(int64_t value, object right)
):
"""
Boilerplate code shared by Timedelta.__floordiv__ and
Timedelta.__rfloordiv__ because np.timedelta64 does not implement these.

Parameters
----------
value : int64_t; `self.value` from a Timedelta object
other : ndarray[timedelta64[ns]]
operation : function, either _floordiv or _rfloordiv

Returns
-------
result : varies based on `other`
"""
# assumes other.dtype.kind == 'm', i.e. other is timedelta-like
# assumes other.ndim != 0

# We need to watch out for np.timedelta64('NaT').
mask = other.view('i8') == NPY_NAT

res = operation(value, other.astype('m8[ns]', copy=False).astype('i8'))

if mask.any():
res = res.astype('f8')
res[mask] = np.nan
return res
10 changes: 8 additions & 2 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import (
get_unit_from_dtype,
is_supported_unit,
)
from pandas._typing import (
AxisInt,
Dtype,
Expand Down Expand Up @@ -439,10 +443,12 @@ def _cmp_method(self, other, op):
def _wrap_ndarray_result(self, result: np.ndarray):
# If we have timedelta64[ns] result, return a TimedeltaArray instead
# of a PandasArray
if result.dtype == "timedelta64[ns]":
if result.dtype.kind == "m" and is_supported_unit(
get_unit_from_dtype(result.dtype)
):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._simple_new(result)
return TimedeltaArray._simple_new(result, dtype=result.dtype)
return type(self)(result)

# ------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64:
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value, setitem=setitem)
if value is NaT:
return np.timedelta64(value.value, "ns")
else:
return value._as_unit(self._unit).asm8
return np.timedelta64(value.value, "ns")

def _scalar_from_string(self, value) -> Timedelta | NaTType:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def _calculate_deltas(
_times = np.asarray(
times.view(np.int64), dtype=np.float64 # type: ignore[union-attr]
)
_halflife = float(Timedelta(halflife).value)
# TODO: generalize to non-nano?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this currently assumes np.diff(_times) and _halflife are both ns, so I think as long as they are both brought to the higher of the resolution it should be fine

_halflife = float(Timedelta(halflife)._as_unit("ns").value)
return np.diff(_times) / _halflife


Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array):
box = box_with_array
index = numeric_idx
expected = TimedeltaIndex([Timedelta(days=n) for n in range(len(index))])
if isinstance(scalar_td, np.timedelta64) and box not in [Index, Series]:
# TODO(2.0): once TDA.astype converts to m8, just do expected.astype
tda = expected._data
dtype = scalar_td.dtype
expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)

index = tm.box_expected(index, box)
expected = tm.box_expected(expected, box)
Expand Down Expand Up @@ -249,6 +254,14 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
index = numeric_idx[1:3]

expected = TimedeltaIndex(["3 Days", "36 Hours"])
if isinstance(three_days, np.timedelta64) and box not in [Index, Series]:
# TODO(2.0): just use expected.astype
tda = expected._data
dtype = three_days.dtype
if dtype < np.dtype("m8[s]"):
# i.e. resolution is lower -> use lowest supported resolution
dtype = np.dtype("m8[s]")
expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)

index = tm.box_expected(index, box)
expected = tm.box_expected(expected, box)
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,14 +463,23 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_r
[pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)],
ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"],
)
def test_maybe_promote_any_with_timedelta64(any_numpy_dtype_reduced, fill_value):
def test_maybe_promote_any_with_timedelta64(
any_numpy_dtype_reduced, fill_value, request
):
dtype = np.dtype(any_numpy_dtype_reduced)

# filling anything but timedelta with timedelta casts to object
if is_timedelta64_dtype(dtype):
expected_dtype = dtype
# for timedelta dtypes, scalar values get cast to pd.Timedelta.value
exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()

if isinstance(fill_value, np.timedelta64) and fill_value.dtype != "m8[ns]":
mark = pytest.mark.xfail(
reason="maybe_promote not yet updated to handle non-nano "
"Timedelta scalar"
)
request.node.add_marker(mark)
else:
expected_dtype = np.dtype(object)
exp_val_for_scalar = fill_value
Expand Down
59 changes: 45 additions & 14 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,16 +856,31 @@ def create_data(constructor):
tm.assert_frame_equal(result_datetime, expected)
tm.assert_frame_equal(result_Timestamp, expected)

def test_constructor_dict_timedelta64_index(self):
@pytest.mark.parametrize(
"klass",
[
pytest.param(
np.timedelta64,
marks=pytest.mark.xfail(
reason="hash mismatch (GH#44504) causes lib.fast_multiget "
"to mess up on dict lookups with equal Timedeltas with "
"mismatched resos"
),
),
timedelta,
Timedelta,
],
)
def test_constructor_dict_timedelta64_index(self, klass):
# GH 10160
td_as_int = [1, 2, 3, 4]

def create_data(constructor):
return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)}
if klass is timedelta:
constructor = lambda x: timedelta(days=x)
else:
constructor = lambda x: klass(x, "D")

data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D"))
data_timedelta = create_data(lambda x: timedelta(days=x))
data_Timedelta = create_data(lambda x: Timedelta(x, "D"))
data = {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)}

expected = DataFrame(
[
Expand All @@ -877,12 +892,8 @@ def create_data(constructor):
index=[Timedelta(td, "D") for td in td_as_int],
)

result_timedelta64 = DataFrame(data_timedelta64)
result_timedelta = DataFrame(data_timedelta)
result_Timedelta = DataFrame(data_Timedelta)
tm.assert_frame_equal(result_timedelta64, expected)
tm.assert_frame_equal(result_timedelta, expected)
tm.assert_frame_equal(result_Timedelta, expected)
result = DataFrame(data)
tm.assert_frame_equal(result, expected)

def test_constructor_period_dict(self):
# PeriodIndex
Expand Down Expand Up @@ -3111,14 +3122,34 @@ def test_from_out_of_bounds_datetime(self, constructor, cls):

assert type(get1(result)) is cls

@pytest.mark.xfail(
reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, "
"but TimedeltaArray._from_sequence has not"
)
@pytest.mark.parametrize("cls", [timedelta, np.timedelta64])
def test_from_out_of_bounds_timedelta(self, constructor, cls):
def test_from_out_of_bounds_ns_timedelta(self, constructor, cls):
# scalar that won't fit in nanosecond td64, but will fit in microsecond
scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1)
exp_dtype = "m8[us]" # smallest reso that fits
if cls is np.timedelta64:
scalar = np.timedelta64(scalar, "D")
exp_dtype = "m8[s]" # closest reso to input
result = constructor(scalar)

assert type(get1(result)) is cls
item = get1(result)
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]

assert type(item) is Timedelta
assert item.asm8.dtype == exp_dtype
assert dtype == exp_dtype

def test_out_of_s_bounds_timedelta64(self, constructor):
scalar = np.timedelta64(np.iinfo(np.int64).max, "D")
result = constructor(scalar)
item = get1(result)
assert type(item) is np.timedelta64
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
assert dtype == object

def test_tzaware_data_tznaive_dtype(self, constructor):
tz = "US/Eastern"
Expand Down
Loading