Skip to content

API: properly box numeric timedelta ops on Series (GH4984) #4985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 25, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Improvements to existing features
is frequency conversion.
- Timedelta64 support ``fillna/ffill/bfill`` with an integer interpreted as seconds,
or a ``timedelta`` (:issue:`3371`)
- Box numeric ops on ``timedelta`` Series (:issue:`4984`)
- Datetime64 support ``ffill/bfill``
- Performance improvements with ``__getitem__`` on ``DataFrames`` with
when the key is a column
Expand Down
19 changes: 19 additions & 0 deletions doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,25 @@ pass a timedelta to get a particular value.
y.fillna(10)
y.fillna(timedelta(days=-1,seconds=5))

.. _timeseries.timedeltas_reductions:

Time Deltas & Reductions
------------------------

.. warning::

A numeric reduction operation for ``timedelta64[ns]`` will return a single-element ``Series`` of
dtype ``timedelta64[ns]``.

You can do numeric reduction operations on timedeltas.

.. ipython:: python

y2 = y.fillna(timedelta(days=-1,seconds=5))
y2
y2.mean()
y2.quantile(.1)

.. _timeseries.timedeltas_convert:

Time Deltas & Conversions
Expand Down
8 changes: 8 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,14 @@ Enhancements
td.fillna(0)
td.fillna(timedelta(days=1,seconds=5))

- You can do numeric reduction operations on timedeltas. Note that these will return
a single-element Series.

.. ipython:: python

td.mean()
td.quantile(.1)

- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
the bandwidth, and to gkde.evaluate() to specify the indicies at which it
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from pandas.core.common import isnull, notnull, _values_from_object
from pandas.core.common import isnull, notnull, _values_from_object, is_float
import pandas.core.common as com
import pandas.lib as lib
import pandas.algos as algos
Expand Down Expand Up @@ -188,6 +188,10 @@ def _wrap_results(result,dtype):
# as series will do the right thing in py3 (and deal with numpy 1.6.2
# bug in that it results dtype of timedelta64[us]
from pandas import Series

# coerce float to results
if is_float(result):
result = int(result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the float value here somehow guaranteed to not have a fractional part? seems strange to just smash it to int

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah just read your comment in the issue thread

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll have to revisit this in the future if a finer timescale is desired

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at worse it kills a fractional ns...and that is below our precision anyhow

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep

result = Series([result],dtype='timedelta64[ns]')
else:
result = result.view(dtype)
Expand Down Expand Up @@ -224,11 +228,15 @@ def nanmean(values, axis=None, skipna=True):
the_mean[ct_mask] = np.nan
else:
the_mean = the_sum / count if count > 0 else np.nan
return the_mean

return _wrap_results(the_mean,dtype)

@disallow('M8')
@bottleneck_switch()
def nanmedian(values, axis=None, skipna=True):

values, mask, dtype = _get_values(values, skipna)

def get_median(x):
mask = notnull(x)
if not skipna and not mask.all():
Expand Down Expand Up @@ -257,7 +265,7 @@ def get_median(x):
return ret

# otherwise return a scalar value
return get_median(values) if notempty else np.nan
return _wrap_results(get_median(values),dtype) if notempty else np.nan


@disallow('M8')
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1981,7 +1981,12 @@ def quantile(self, q=0.5):
valid_values = self.dropna().values
if len(valid_values) == 0:
return pa.NA
return _quantile(valid_values, q * 100)
result = _quantile(valid_values, q * 100)
if result.dtype == _TD_DTYPE:
from pandas.tseries.timedeltas import to_timedelta
return to_timedelta(result)

return result

def ptp(self, axis=None, out=None):
return _values_from_object(self).ptp(axis, out)
Expand Down
43 changes: 36 additions & 7 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd

from pandas import (Index, Series, DataFrame, isnull, notnull,
from pandas import (Index, Series, DataFrame, Timestamp, isnull, notnull,
bdate_range, date_range, _np_version_under1p7)
import pandas.core.common as com
from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long
Expand Down Expand Up @@ -123,8 +123,8 @@ def conv(v):
def test_nat_converters(self):
_skip_if_numpy_not_friendly()

self.assert_(to_timedelta('nat') == tslib.iNaT)
self.assert_(to_timedelta('nan') == tslib.iNaT)
self.assert_(to_timedelta('nat',box=False) == tslib.iNaT)
self.assert_(to_timedelta('nan',box=False) == tslib.iNaT)

def test_to_timedelta(self):
_skip_if_numpy_not_friendly()
Expand All @@ -133,11 +133,11 @@ def conv(v):
return v.astype('m8[ns]')
d1 = np.timedelta64(1,'D')

self.assert_(to_timedelta('1 days 06:05:01.00003') == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us')))
self.assert_(to_timedelta('15.5us') == conv(np.timedelta64(15500,'ns')))
self.assert_(to_timedelta('1 days 06:05:01.00003',box=False) == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us')))
self.assert_(to_timedelta('15.5us',box=False) == conv(np.timedelta64(15500,'ns')))

# empty string
result = to_timedelta('')
result = to_timedelta('',box=False)
self.assert_(result == tslib.iNaT)

result = to_timedelta(['', ''])
Expand All @@ -150,7 +150,7 @@ def conv(v):

# ints
result = np.timedelta64(0,'ns')
expected = to_timedelta(0)
expected = to_timedelta(0,box=False)
self.assert_(result == expected)

# Series
Expand All @@ -163,6 +163,35 @@ def conv(v):
expected = to_timedelta([0,10],unit='s')
tm.assert_series_equal(result, expected)

# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v,box=False)
expected = to_timedelta([v])

v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v,box=False)
expected = to_timedelta([v])

def test_timedelta_ops(self):
_skip_if_numpy_not_friendly()

# GH4984
# make sure ops return timedeltas
s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ])
td = s.diff()

result = td.mean()
expected = to_timedelta(timedelta(seconds=9))
tm.assert_series_equal(result, expected)

result = td.quantile(.1)
expected = to_timedelta('00:00:02.6')
tm.assert_series_equal(result, expected)

result = td.median()
expected = to_timedelta('00:00:08')
tm.assert_series_equal(result, expected)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
2 changes: 1 addition & 1 deletion pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _convert_listlike(arg, box):
elif is_list_like(arg):
return _convert_listlike(arg, box=box)

return _convert_listlike([ arg ], box=False)[0]
return _convert_listlike([ arg ], box=box)

_short_search = re.compile(
"^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)
Expand Down