Skip to content

BUG: ValueError raised by cummin/cummax when datetime64 Series contains NaT. #8966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 3, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ Bug Fixes
not lexically sorted or unique (:issue:`7724`)
- BUG CSV: fix problem with trailing whitespace in skipped rows, (:issue:`8679`), (:issue:`8661`)
- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`)
- Fixed ValueError raised by cummin/cummax when datetime64 Series contains NaT. (:issue:`8965`)
14 changes: 9 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4112,13 +4112,17 @@ def func(self, axis=None, dtype=None, out=None, skipna=True,
axis = self._get_axis_number(axis)

y = _values_from_object(self).copy()
if not issubclass(y.dtype.type, (np.integer, np.bool_)):

if skipna and issubclass(y.dtype.type,
(np.datetime64, np.timedelta64)):
result = accum_func(y, axis)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think calling accum_func with NaTs present is okay because the NaTs are neither greater than nor less than any other datetime64:

In [6]: pd.NaT < np.datetime64(('2010-01-01'))
Out[6]: False

In [7]: np.datetime64(('2010-01-01')) < pd.NaT 
Out[7]: False

mask = isnull(self)
np.putmask(result, mask, pd.tslib.iNaT)
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isnull(self)
if skipna:
np.putmask(y, mask, mask_a)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
if skipna:
np.putmask(result, mask, mask_b)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)

Expand Down
56 changes: 56 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2309,6 +2309,62 @@ def test_cummax(self):

self.assert_numpy_array_equal(result, expected)

def test_cummin_datetime64(self):
s = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-3']))

expected = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-1']))
result = s.cummin(skipna=True)
self.assert_series_equal(expected, result)

expected = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', '2000-1-2', '2000-1-1', '2000-1-1', '2000-1-1']))
result = s.cummin(skipna=False)
self.assert_series_equal(expected, result)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback: s.cummin(skipna=False) is passing this test. Can you give an example where it fails?


def test_cummax_datetime64(self):
s = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-3']))

expected = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', 'NaT', '2000-1-2', 'NaT', '2000-1-3']))
result = s.cummax(skipna=True)
self.assert_series_equal(expected, result)

expected = pd.Series(pd.to_datetime(
['NaT', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-3']))
result = s.cummax(skipna=False)
self.assert_series_equal(expected, result)

def test_cummin_timedelta64(self):
s = pd.Series(pd.to_timedelta(
['NaT', '2 min', 'NaT', '1 min', 'NaT', '3 min', ]))

expected = pd.Series(pd.to_timedelta(
['NaT', '2 min', 'NaT', '1 min', 'NaT', '1 min', ]))
result = s.cummin(skipna=True)
self.assert_series_equal(expected, result)

expected = pd.Series(pd.to_timedelta(
['NaT', '2 min', '2 min', '1 min', '1 min', '1 min', ]))
result = s.cummin(skipna=False)
self.assert_series_equal(expected, result)

def test_cummax_timedelta64(self):
s = pd.Series(pd.to_timedelta(
['NaT', '2 min', 'NaT', '1 min', 'NaT', '3 min', ]))

expected = pd.Series(pd.to_timedelta(
['NaT', '2 min', 'NaT', '2 min', 'NaT', '3 min', ]))
result = s.cummax(skipna=True)
self.assert_series_equal(expected, result)

expected = pd.Series(pd.to_timedelta(
['NaT', '2 min', '2 min', '2 min', '2 min', '3 min', ]))
result = s.cummax(skipna=False)
self.assert_series_equal(expected, result)

def test_npdiff(self):
raise nose.SkipTest("skipping due to Series no longer being an "
"ndarray")
Expand Down