Skip to content

PERF: better perf on min/max on indices not containing NaT for DatetimeIndex/PeriodsIndex #7684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 7, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas.core import common as com
import pandas.core.nanops as nanops
import pandas.tslib as tslib

from pandas.util.decorators import cache_readonly

class StringMixin(object):

Expand Down Expand Up @@ -392,6 +392,11 @@ def _box_values(self, values):
import pandas.lib as lib
return lib.map_infer(values, self._box_func)

@cache_readonly
def hasnans(self):
""" return if I have any nans; enables various perf speedups """
return (self.asi8 == tslib.iNaT).any()

@property
def asobject(self):
from pandas.core.index import Index
Expand All @@ -408,11 +413,18 @@ def min(self, axis=None):
Overridden ndarray.min to return an object
"""
try:
mask = self.asi8 == tslib.iNaT
if mask.any():
i8 = self.asi8

# quick check
if len(i8) and self.is_monotonic:
if i8[0] != tslib.iNaT:
return self._box_func(i8[0])

if self.hasnans:
mask = i8 == tslib.iNaT
min_stamp = self[~mask].asi8.min()
else:
min_stamp = self.asi8.min()
min_stamp = i8.min()
return self._box_func(min_stamp)
except ValueError:
return self._na_value
Expand All @@ -422,11 +434,18 @@ def max(self, axis=None):
Overridden ndarray.max to return an object
"""
try:
mask = self.asi8 == tslib.iNaT
if mask.any():
i8 = self.asi8

# quick check
if len(i8) and self.is_monotonic:
if i8[-1] != tslib.iNaT:
return self._box_func(i8[-1])

if self.hasnans:
mask = i8 == tslib.iNaT
max_stamp = self[~mask].asi8.max()
else:
max_stamp = self.asi8.max()
max_stamp = i8.max()
return self._box_func(max_stamp)
except ValueError:
return self._na_value
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2072,7 +2072,7 @@ def __contains__(self, other):

try:
# if other is a sequence this throws a ValueError
return np.isnan(other) and self._hasnans
return np.isnan(other) and self.hasnans
except ValueError:
try:
return len(other) <= 1 and _try_get_item(other) in self
Expand Down Expand Up @@ -2109,7 +2109,7 @@ def _isnan(self):
return np.isnan(self.values)

@cache_readonly
def _hasnans(self):
def hasnans(self):
return self._isnan.any()

@cache_readonly
Expand Down
6 changes: 3 additions & 3 deletions pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ def is_lexsorted(list list_of_arrays):
@cython.boundscheck(False)
@cython.wraparound(False)
def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
object closed='left'):
object closed='left', bint hasnans=0):
"""
Int64 (datetime64) version of generic python version in groupby.py
"""
Expand All @@ -968,9 +968,9 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
int64_t l_bin, r_bin, nat_count
bint right_closed = closed == 'right'

mask = values == iNaT
nat_count = 0
if mask.any():
if hasnans:
mask = values == iNaT
nat_count = np.sum(mask)
values = values[~mask]

Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _get_time_bins(self, ax):
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)

# general version, knowing nothing about relative frequencies
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed)
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans)

if self.closed == 'right':
labels = binner
Expand All @@ -188,7 +188,7 @@ def _get_time_bins(self, ax):
elif not trimmed:
labels = labels[:-1]

if (ax_values == tslib.iNaT).any():
if ax.hasnans:
binner = binner.insert(0, tslib.NaT)
labels = labels.insert(0, tslib.NaT)

Expand Down