Skip to content

Commit d6eace7

Browse files
committed
Merge pull request #7684 from jreback/perf
PERF: better perf on min/max on indices not containing NaT for DatetimeIndex/PeriodsIndex
2 parents 17e2ea3 + 0d3c74c commit d6eace7

File tree

4 files changed

+33
-14
lines changed

4 files changed

+33
-14
lines changed

pandas/core/base.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.core import common as com
77
import pandas.core.nanops as nanops
88
import pandas.tslib as tslib
9-
9+
from pandas.util.decorators import cache_readonly
1010

1111
class StringMixin(object):
1212

@@ -401,6 +401,11 @@ def _box_values(self, values):
401401
import pandas.lib as lib
402402
return lib.map_infer(values, self._box_func)
403403

404+
@cache_readonly
405+
def hasnans(self):
406+
""" return if I have any nans; enables various perf speedups """
407+
return (self.asi8 == tslib.iNaT).any()
408+
404409
@property
405410
def asobject(self):
406411
from pandas.core.index import Index
@@ -417,11 +422,18 @@ def min(self, axis=None):
417422
Overridden ndarray.min to return an object
418423
"""
419424
try:
420-
mask = self.asi8 == tslib.iNaT
421-
if mask.any():
425+
i8 = self.asi8
426+
427+
# quick check
428+
if len(i8) and self.is_monotonic:
429+
if i8[0] != tslib.iNaT:
430+
return self._box_func(i8[0])
431+
432+
if self.hasnans:
433+
mask = i8 == tslib.iNaT
422434
min_stamp = self[~mask].asi8.min()
423435
else:
424-
min_stamp = self.asi8.min()
436+
min_stamp = i8.min()
425437
return self._box_func(min_stamp)
426438
except ValueError:
427439
return self._na_value
@@ -431,11 +443,18 @@ def max(self, axis=None):
431443
Overridden ndarray.max to return an object
432444
"""
433445
try:
434-
mask = self.asi8 == tslib.iNaT
435-
if mask.any():
446+
i8 = self.asi8
447+
448+
# quick check
449+
if len(i8) and self.is_monotonic:
450+
if i8[-1] != tslib.iNaT:
451+
return self._box_func(i8[-1])
452+
453+
if self.hasnans:
454+
mask = i8 == tslib.iNaT
436455
max_stamp = self[~mask].asi8.max()
437456
else:
438-
max_stamp = self.asi8.max()
457+
max_stamp = i8.max()
439458
return self._box_func(max_stamp)
440459
except ValueError:
441460
return self._na_value

pandas/core/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,7 +2072,7 @@ def __contains__(self, other):
20722072

20732073
try:
20742074
# if other is a sequence this throws a ValueError
2075-
return np.isnan(other) and self._hasnans
2075+
return np.isnan(other) and self.hasnans
20762076
except ValueError:
20772077
try:
20782078
return len(other) <= 1 and _try_get_item(other) in self
@@ -2109,7 +2109,7 @@ def _isnan(self):
21092109
return np.isnan(self.values)
21102110

21112111
@cache_readonly
2112-
def _hasnans(self):
2112+
def hasnans(self):
21132113
return self._isnan.any()
21142114

21152115
@cache_readonly

pandas/lib.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,7 @@ def is_lexsorted(list list_of_arrays):
958958
@cython.boundscheck(False)
959959
@cython.wraparound(False)
960960
def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
961-
object closed='left'):
961+
object closed='left', bint hasnans=0):
962962
"""
963963
Int64 (datetime64) version of generic python version in groupby.py
964964
"""
@@ -968,9 +968,9 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
968968
int64_t l_bin, r_bin, nat_count
969969
bint right_closed = closed == 'right'
970970

971-
mask = values == iNaT
972971
nat_count = 0
973-
if mask.any():
972+
if hasnans:
973+
mask = values == iNaT
974974
nat_count = np.sum(mask)
975975
values = values[~mask]
976976

pandas/tseries/resample.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def _get_time_bins(self, ax):
174174
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
175175

176176
# general version, knowing nothing about relative frequencies
177-
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed)
177+
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans)
178178

179179
if self.closed == 'right':
180180
labels = binner
@@ -188,7 +188,7 @@ def _get_time_bins(self, ax):
188188
elif not trimmed:
189189
labels = labels[:-1]
190190

191-
if (ax_values == tslib.iNaT).any():
191+
if ax.hasnans:
192192
binner = binner.insert(0, tslib.NaT)
193193
labels = labels.insert(0, tslib.NaT)
194194

0 commit comments

Comments
 (0)