-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG/REF: TimedeltaIndex.__new__ #23539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
afc2d30
e4b06ca
231a5c1
1ff432b
645e99c
9c89746
ef3f277
b20eda9
3f76c02
9d79205
d73bee6
f39b806
ccc7fcf
6fda27e
c63796a
e9b5da6
b43e936
da6b286
898444f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,11 +8,16 @@ | |
is_float, | ||
is_list_like, | ||
is_scalar, | ||
is_integer_dtype, | ||
is_float_dtype, | ||
is_object_dtype, | ||
is_string_dtype, | ||
is_timedelta64_dtype, | ||
is_timedelta64_ns_dtype, | ||
pandas_dtype, | ||
ensure_int64) | ||
from pandas.core.dtypes.missing import isna | ||
from pandas.core.dtypes.generic import ABCSeries | ||
|
||
from pandas.core.arrays.timedeltas import ( | ||
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) | ||
|
@@ -35,7 +40,7 @@ | |
from pandas.core.tools.timedeltas import ( | ||
to_timedelta, _coerce_scalar_to_timedelta_type) | ||
from pandas._libs import (lib, index as libindex, | ||
join as libjoin, Timedelta, NaT) | ||
join as libjoin, Timedelta, NaT, iNaT) | ||
from pandas._libs.tslibs.timedeltas import array_to_timedelta64 | ||
|
||
|
||
|
@@ -139,12 +144,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, | |
periods=None, closed=None, dtype=None, copy=False, | ||
name=None, verify_integrity=True): | ||
|
||
if isinstance(data, TimedeltaIndex) and freq is None and name is None: | ||
if copy: | ||
return data.copy() | ||
else: | ||
return data._shallow_copy() | ||
|
||
freq, freq_infer = dtl.maybe_infer_freq(freq) | ||
|
||
if data is None: | ||
|
@@ -154,32 +153,73 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, | |
result.name = name | ||
return result | ||
|
||
if unit is not None: | ||
data = to_timedelta(data, unit=unit, box=False) | ||
|
||
if is_scalar(data): | ||
raise ValueError('TimedeltaIndex() must be called with a ' | ||
raise ValueError('{cls}() must be called with a ' | ||
'collection of some kind, {data} was passed' | ||
.format(data=repr(data))) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.format(cls=cls.__name__, data=repr(data))) | ||
|
||
# convert if not already | ||
if getattr(data, 'dtype', None) != _TD_DTYPE: | ||
if isinstance(data, TimedeltaIndex) and freq is None and name is None: | ||
if copy: | ||
return data.copy() | ||
else: | ||
return data._shallow_copy() | ||
|
||
# - Cases checked above all return/raise before reaching here - # | ||
|
||
if unit is not None: | ||
data = to_timedelta(data, unit=unit, box=False) | ||
elif copy: | ||
data = np.array(data, copy=True) | ||
|
||
data = np.array(data, copy=False) | ||
if data.dtype == np.object_: | ||
data = array_to_timedelta64(data) | ||
if data.dtype != _TD_DTYPE: | ||
if is_timedelta64_dtype(data): | ||
|
||
# Unwrap whatever we have into a np.ndarray | ||
if not hasattr(data, 'dtype'): | ||
# e.g. list, tuple | ||
if np.ndim(data) == 0: | ||
# i.e.g generator | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data = list(data) | ||
data = np.array(data, copy=False) | ||
elif isinstance(data, ABCSeries): | ||
data = data._values | ||
elif isinstance(data, (cls, TimedeltaArrayMixin)): | ||
data = data._data | ||
|
||
# Convert whatever we have into timedelta64[ns] dtype | ||
if is_object_dtype(data) or is_string_dtype(data): | ||
# no need to make a copy, need to convert if string-dtyped | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why would u check is_string_dtype? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because we have several tests that specifically pass e.g. |
||
data = np.array(data, dtype=np.object_, copy=False) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data = array_to_timedelta64(data).view(_TD_DTYPE) | ||
copy = False | ||
elif is_integer_dtype(data): | ||
# treat as nanoseconds | ||
# if something other than int64, convert | ||
data = ensure_int64(data) | ||
if copy: | ||
# TODO: can we avoid branching here? `astype(data, copy=False)` | ||
# appears to be making a copy | ||
data = data.astype(_TD_DTYPE) | ||
copy = False | ||
else: | ||
data = data.view(_TD_DTYPE) | ||
elif is_float_dtype(data): | ||
# We allow it if and only if it can be converted lossessly | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mask = np.isnan(data) | ||
casted = data.astype(np.int64) | ||
if not (casted[~mask] == data[~mask]).all(): | ||
raise TypeError("floating-dtype data cannot be losslessly " | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"converted to {cls}".format(cls=cls.__name__)) | ||
data = casted.view(_TD_DTYPE) | ||
data[mask] = iNaT | ||
copy = False | ||
elif is_timedelta64_dtype(data): | ||
if data.dtype != _TD_DTYPE: | ||
# non-nano unit | ||
# TODO: watch out for overflows | ||
data = data.astype(_TD_DTYPE) | ||
else: | ||
data = ensure_int64(data).view(_TD_DTYPE) | ||
copy = False | ||
else: | ||
raise TypeError("dtype {dtype} is invalid for constructing {cls}" | ||
.format(dtype=data.dtype, cls=cls.__name__)) | ||
|
||
assert data.dtype == 'm8[ns]', data.dtype | ||
data = np.array(data, copy=copy) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert data.dtype == 'm8[ns]', data | ||
|
||
subarr = cls._simple_new(data, name=name, freq=freq) | ||
# check that we are matching freqs | ||
|
@@ -188,9 +228,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, | |
cls._validate_frequency(subarr, freq) | ||
|
||
if freq_infer: | ||
inferred = subarr.inferred_freq | ||
if inferred: | ||
subarr.freq = to_offset(inferred) | ||
subarr.freq = to_offset(subarr.inferred_freq) | ||
|
||
return subarr | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1054,11 +1054,11 @@ def test_tdi_mul_float_series(self, box_df_fail): | |
idx = tm.box_expected(idx, box) | ||
|
||
rng5f = np.arange(5, dtype='float64') | ||
expected = TimedeltaIndex(rng5f * (rng5f + 0.1)) | ||
expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) | ||
box2 = pd.Series if box is pd.Index else box | ||
expected = tm.box_expected(expected, box2) | ||
|
||
result = idx * Series(rng5f + 0.1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel why did you change this? Left-over from initially changing the behaviour on floats? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. |
||
result = idx * Series(rng5f + 1.0) | ||
tm.assert_equal(result, expected) | ||
|
||
# TODO: Put Series/DataFrame in others? | ||
|
Uh oh!
There was an error while loading. Please reload this page.