-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: strictness and checks for Timedelta _simple_new #23433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
from pandas import compat | ||
|
||
from pandas.core.dtypes.common import ( | ||
_TD_DTYPE, ensure_int64, is_timedelta64_dtype, is_list_like) | ||
_TD_DTYPE, is_list_like) | ||
from pandas.core.dtypes.generic import ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
||
|
@@ -111,16 +111,16 @@ def dtype(self): | |
_attributes = ["freq"] | ||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, **kwargs): | ||
values = np.array(values, copy=False) | ||
if values.dtype == np.object_: | ||
values = array_to_timedelta64(values) | ||
if values.dtype != _TD_DTYPE: | ||
if is_timedelta64_dtype(values): | ||
# non-nano unit | ||
values = values.astype(_TD_DTYPE) | ||
else: | ||
values = ensure_int64(values).view(_TD_DTYPE) | ||
def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): | ||
# `dtype` is passed by _shallow_copy in corner cases, should always | ||
# be timedelta64[ns] if present | ||
assert dtype == _TD_DTYPE | ||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
if values.dtype == 'i8': | ||
values = values.view('m8[ns]') | ||
|
||
assert values.dtype == 'm8[ns]' | ||
|
||
result = object.__new__(cls) | ||
result._data = values | ||
|
@@ -131,6 +131,10 @@ def __new__(cls, values, freq=None): | |
|
||
freq, freq_infer = dtl.maybe_infer_freq(freq) | ||
|
||
values = np.array(values, copy=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so why do you need to accept object type here (you are also checking for this in TDI.new). ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ATM we are checking for it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, assume this is on the list to de-duplicate |
||
if values.dtype == np.object_: | ||
values = array_to_timedelta64(values) | ||
|
||
result = cls._simple_new(values, freq=freq) | ||
if freq_infer: | ||
inferred = result.inferred_freq | ||
|
@@ -166,17 +170,15 @@ def _generate_range(cls, start, end, periods, freq, closed=None): | |
|
||
if freq is not None: | ||
index = _generate_regular_range(start, end, periods, freq) | ||
index = cls._simple_new(index, freq=freq) | ||
else: | ||
index = np.linspace(start.value, end.value, periods).astype('i8') | ||
index = cls._simple_new(index, freq=freq) | ||
|
||
if not left_closed: | ||
index = index[1:] | ||
if not right_closed: | ||
index = index[:-1] | ||
|
||
return index | ||
return cls._simple_new(index, freq=freq) | ||
|
||
# ---------------------------------------------------------------- | ||
# Arithmetic Methods | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
to_timedelta, _coerce_scalar_to_timedelta_type) | ||
from pandas._libs import (lib, index as libindex, | ||
join as libjoin, Timedelta, NaT) | ||
from pandas._libs.tslibs.timedeltas import array_to_timedelta64 | ||
|
||
|
||
class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, | ||
|
@@ -166,6 +167,19 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, | |
elif copy: | ||
data = np.array(data, copy=True) | ||
|
||
data = np.array(data, copy=False) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if data.dtype == np.object_: | ||
data = array_to_timedelta64(data) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are these checks NOT done in _simple_new? this is inconsistent with other code. We should be really really clear on what is acceptable in _simple_new vs. what is not. IIRC from another of your PR's you did checks on object type in _simple_new for example. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAICT the current verbose-checking is largely driven by the weird cases (that these PRs get rid of) where This and the associated DatetimeIndex PR impose a simple/strict API for _simple_new: it expects an np.ndarray that may be either i8 or M8[ns]/m8[ns]. |
||
if data.dtype != _TD_DTYPE: | ||
if is_timedelta64_dtype(data): | ||
# non-nano unit | ||
# TODO: watch out for overflows | ||
data = data.astype(_TD_DTYPE) | ||
else: | ||
data = ensure_int64(data).view(_TD_DTYPE) | ||
|
||
assert data.dtype == 'm8[ns]', data.dtype | ||
|
||
subarr = cls._simple_new(data, name=name, freq=freq) | ||
# check that we are matching freqs | ||
if verify_integrity and len(subarr) > 0: | ||
|
@@ -180,12 +194,23 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, | |
return subarr | ||
|
||
@classmethod | ||
def _simple_new(cls, values, name=None, freq=None, **kwargs): | ||
result = super(TimedeltaIndex, cls)._simple_new(values, freq, **kwargs) | ||
def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): | ||
# `dtype` is passed by _shallow_copy in corner cases, should always | ||
# be timedelta64[ns] if present | ||
assert dtype == _TD_DTYPE | ||
|
||
assert isinstance(values, np.ndarray), type(values) | ||
if values.dtype == 'i8': | ||
values = values.view('m8[ns]') | ||
assert values.dtype == 'm8[ns]', values.dtype | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same questions for all your assert statements:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The question of testing is actually more general to all of these changes. Even though it's been labeled as internal, not sure if any of these edits will surface in any way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
(moving your responses to the conversation bubble in the UI, organizational thing) @jbrockmendel : What I was wondering was whether we could trigger these Might be tricky if these edits are purely internal, and if it is too difficult, not a big deal. Just out of curiosity since tests are good if we can have them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. Yah, we could |
||
|
||
result = super(TimedeltaIndex, cls)._simple_new(values, freq) | ||
result.name = name | ||
result._reset_identity() | ||
return result | ||
|
||
_shallow_copy = Index._shallow_copy | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@property | ||
def _formatter_func(self): | ||
from pandas.io.formats.format import _get_format_timedelta64 | ||
|
Uh oh!
There was an error while loading. Please reload this page.