Skip to content

Commit 0d3c74c

Browse files
committed
PERF: better perf on min/max/resample when on indices not containing NaT for DatetimeIndex/PeriodIndex (GH7633)
1 parent e060616 commit 0d3c74c

File tree

4 files changed

+33
-14
lines changed

4 files changed

+33
-14
lines changed

pandas/core/base.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.core import common as com
77
import pandas.core.nanops as nanops
88
import pandas.tslib as tslib
9-
9+
from pandas.util.decorators import cache_readonly
1010

1111
class StringMixin(object):
1212

@@ -392,6 +392,11 @@ def _box_values(self, values):
392392
import pandas.lib as lib
393393
return lib.map_infer(values, self._box_func)
394394

395+
@cache_readonly
396+
def hasnans(self):
397+
""" return if I have any nans; enables various perf speedups """
398+
return (self.asi8 == tslib.iNaT).any()
399+
395400
@property
396401
def asobject(self):
397402
from pandas.core.index import Index
@@ -408,11 +413,18 @@ def min(self, axis=None):
408413
Overridden ndarray.min to return an object
409414
"""
410415
try:
411-
mask = self.asi8 == tslib.iNaT
412-
if mask.any():
416+
i8 = self.asi8
417+
418+
# quick check
419+
if len(i8) and self.is_monotonic:
420+
if i8[0] != tslib.iNaT:
421+
return self._box_func(i8[0])
422+
423+
if self.hasnans:
424+
mask = i8 == tslib.iNaT
413425
min_stamp = self[~mask].asi8.min()
414426
else:
415-
min_stamp = self.asi8.min()
427+
min_stamp = i8.min()
416428
return self._box_func(min_stamp)
417429
except ValueError:
418430
return self._na_value
@@ -422,11 +434,18 @@ def max(self, axis=None):
422434
Overridden ndarray.max to return an object
423435
"""
424436
try:
425-
mask = self.asi8 == tslib.iNaT
426-
if mask.any():
437+
i8 = self.asi8
438+
439+
# quick check
440+
if len(i8) and self.is_monotonic:
441+
if i8[-1] != tslib.iNaT:
442+
return self._box_func(i8[-1])
443+
444+
if self.hasnans:
445+
mask = i8 == tslib.iNaT
427446
max_stamp = self[~mask].asi8.max()
428447
else:
429-
max_stamp = self.asi8.max()
448+
max_stamp = i8.max()
430449
return self._box_func(max_stamp)
431450
except ValueError:
432451
return self._na_value

pandas/core/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,7 +2072,7 @@ def __contains__(self, other):
20722072

20732073
try:
20742074
# if other is a sequence this throws a ValueError
2075-
return np.isnan(other) and self._hasnans
2075+
return np.isnan(other) and self.hasnans
20762076
except ValueError:
20772077
try:
20782078
return len(other) <= 1 and _try_get_item(other) in self
@@ -2109,7 +2109,7 @@ def _isnan(self):
21092109
return np.isnan(self.values)
21102110

21112111
@cache_readonly
2112-
def _hasnans(self):
2112+
def hasnans(self):
21132113
return self._isnan.any()
21142114

21152115
@cache_readonly

pandas/lib.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,7 @@ def is_lexsorted(list list_of_arrays):
958958
@cython.boundscheck(False)
959959
@cython.wraparound(False)
960960
def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
961-
object closed='left'):
961+
object closed='left', bint hasnans=0):
962962
"""
963963
Int64 (datetime64) version of generic python version in groupby.py
964964
"""
@@ -968,9 +968,9 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
968968
int64_t l_bin, r_bin, nat_count
969969
bint right_closed = closed == 'right'
970970

971-
mask = values == iNaT
972971
nat_count = 0
973-
if mask.any():
972+
if hasnans:
973+
mask = values == iNaT
974974
nat_count = np.sum(mask)
975975
values = values[~mask]
976976

pandas/tseries/resample.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def _get_time_bins(self, ax):
174174
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
175175

176176
# general version, knowing nothing about relative frequencies
177-
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed)
177+
bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans)
178178

179179
if self.closed == 'right':
180180
labels = binner
@@ -188,7 +188,7 @@ def _get_time_bins(self, ax):
188188
elif not trimmed:
189189
labels = labels[:-1]
190190

191-
if (ax_values == tslib.iNaT).any():
191+
if ax.hasnans:
192192
binner = binner.insert(0, tslib.NaT)
193193
labels = labels.insert(0, tslib.NaT)
194194

0 commit comments

Comments
 (0)