Skip to content

Commit 1e13e98

Browse files
tptopper-123
tp
authored andcommitted
improve performance of Series.searchsorted
1 parent f75a220 commit 1e13e98

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

asv_bench/benchmarks/series_methods.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,25 @@ def time_dropna(self, dtype):
124124
self.s.dropna()
125125

126126

127+
class SearchSorted(object):
128+
129+
goal_time = 0.2
130+
params = ['int8', 'int16', 'int32', 'int64',
131+
'uint8', 'uint16', 'uint32', 'uint64',
132+
'float16', 'float32', 'float64',
133+
'str']
134+
param_names = ['dtype']
135+
136+
def setup(self, dtype):
137+
N = 10**5
138+
data = np.array([1] * N + [2] * N + [3] * N).astype(dtype)
139+
self.s = Series(data)
140+
141+
def time_searchsorted(self, dtype):
142+
key = '2' if dtype == 'str' else 2
143+
self.s.searchsorted(key)
144+
145+
127146
class Map(object):
128147

129148
params = ['dict', 'Series']

doc/source/whatsnew/v0.25.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ Performance Improvements
6363

6464
- Significant speedup in `SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
6565
- `DataFrame.to_stata()` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`)
66-
-
66+
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is int8/int16/int32 and the searched key is within
67+
the integer bounds for the dtype(:issue:`22034`)
6768

6869

6970
.. _whatsnew_0250.bug_fixes:

pandas/core/series.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2333,9 +2333,11 @@ def __rmatmul__(self, other):
23332333
def searchsorted(self, value, side='left', sorter=None):
23342334
if sorter is not None:
23352335
sorter = ensure_platform_int(sorter)
2336-
result = self._values.searchsorted(Series(value)._values,
2337-
side=side, sorter=sorter)
2336+
if not is_extension_type(self._values):
2337+
value = np.asarray(value, dtype=self._values.dtype)
2338+
value = value[..., np.newaxis] if value.ndim == 0 else value
23382339

2340+
result = self._values.searchsorted(value, side=side, sorter=sorter)
23392341
return result[0] if is_scalar(value) else result
23402342

23412343
# -------------------------------------------------------------------

0 commit comments

Comments
 (0)