Skip to content

Commit c65f8f8

Browse files
committed
Make common impl. with Index.searchsorted
1 parent 9910278 commit c65f8f8

File tree

3 files changed

+101
-19
lines changed

3 files changed

+101
-19
lines changed

pandas/core/base.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,8 +1497,12 @@ def factorize(self, sort=False, na_sentinel=-1):
14971497
@Substitution(klass='IndexOpsMixin')
14981498
@Appender(_shared_docs['searchsorted'])
14991499
def searchsorted(self, value, side='left', sorter=None):
1500-
# needs coercion on the key (DatetimeIndex does already)
1501-
return self._values.searchsorted(value, side=side, sorter=sorter)
1500+
result = com.searchsorted(self._values, value,
1501+
side=side, sorter=sorter)
1502+
1503+
if is_scalar(value):
1504+
return result if is_scalar(result) else result[0]
1505+
return result
15021506

15031507
def drop_duplicates(self, keep='first', inplace=False):
15041508
inplace = validate_bool_kwarg(inplace, 'inplace')

pandas/core/common.py

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
import numpy as np
1313

1414
from pandas._libs import lib, tslibs
15-
import pandas.compat as compat
16-
from pandas.compat import PY36, OrderedDict, iteritems
17-
1815
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
19-
from pandas.core.dtypes.common import (
20-
is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer)
21-
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
16+
from pandas import compat
17+
from pandas.compat import iteritems, PY2, PY36, OrderedDict
18+
from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
19+
from pandas.core.dtypes.common import (is_integer, is_integer_dtype,
20+
is_bool_dtype, is_extension_array_dtype,
21+
is_array_like,
22+
is_float_dtype, is_object_dtype,
23+
is_categorical_dtype, is_numeric_dtype,
24+
is_scalar, ensure_platform_int)
2225
from pandas.core.dtypes.inference import _iterable_not_string
2326
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
2427

@@ -481,3 +484,83 @@ def f(x):
481484
f = mapper
482485

483486
return f
487+
488+
489+
def ensure_integer_dtype(arr, value):
490+
"""
491+
Ensure optimal dtype for :func:`searchsorted_integer` is returned.
492+
493+
Parameters
494+
----------
495+
arr : a numpy integer array
496+
value : a number or array of numbers
497+
498+
Returns
499+
-------
500+
dtype : an numpy integer dtype
501+
502+
Raises
503+
------
504+
TypeError : if value is not a number
505+
"""
506+
value_arr = np.array([value]) if is_scalar(value) else np.array(value)
507+
508+
if PY2 and not is_numeric_dtype(value_arr):
509+
# python 2 allows "a" < 1, avoid such nonsense
510+
msg = "value must be numeric, was type {}"
511+
raise TypeError(msg.format(value))
512+
513+
iinfo = np.iinfo(arr.dtype)
514+
if not ((value_arr < iinfo.min).any() or (value_arr > iinfo.max).any()):
515+
return arr.dtype
516+
else:
517+
return value_arr.dtype
518+
519+
520+
def searchsorted_integer(arr, value, side="left", sorter=None):
521+
"""
522+
searchsorted implementation, but only for integer arrays.
523+
524+
We get a speedup if the dtype of arr and value is the same.
525+
526+
See :func:`searchsorted` for a more general searchsorted implementation.
527+
"""
528+
if sorter is not None:
529+
sorter = ensure_platform_int(sorter)
530+
531+
dtype = ensure_integer_dtype(arr, value)
532+
533+
if is_integer(value) or is_integer_dtype(value):
534+
value = np.asarray(value, dtype=dtype)
535+
elif hasattr(value, 'is_integer') and value.is_integer():
536+
# float 2.0 can be converted to int 2 for better speed,
537+
# but float 2.2 should *not* be converted to int 2
538+
value = np.asarray(value, dtype=dtype)
539+
540+
return np.searchsorted(arr, value, side=side, sorter=sorter)
541+
542+
543+
def searchsorted(arr, value, side="left", sorter=None):
544+
"""
545+
Find indices where elements should be inserted to maintain order.
546+
547+
Find the indices into a sorted array-like `arr` such that, if the
548+
corresponding elements in `value` were inserted before the indices,
549+
the order of `arr` would be preserved.
550+
551+
See :class:`IndexOpsMixin.searchsorted` for more details and examples.
552+
"""
553+
if sorter is not None:
554+
sorter = ensure_platform_int(sorter)
555+
556+
if is_integer_dtype(arr):
557+
return searchsorted_integer(arr, value, side=side, sorter=sorter)
558+
elif (is_object_dtype(arr) or is_float_dtype(arr) or
559+
is_categorical_dtype(arr)):
560+
return arr.searchsorted(value, side=side, sorter=sorter)
561+
else:
562+
# fallback solution. E.g. arr is an array with dtype='datetime64[ns]'
563+
# and value is a pd.Timestamp, need to convert value
564+
from pandas.core.series import Series
565+
value = Series(value)._values
566+
return arr.searchsorted(value, side=side, sorter=sorter)

pandas/core/series.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2331,17 +2331,12 @@ def __rmatmul__(self, other):
23312331
@Substitution(klass='Series')
23322332
@Appender(base._shared_docs['searchsorted'])
23332333
def searchsorted(self, value, side='left', sorter=None):
2334-
if sorter is not None:
2335-
sorter = ensure_platform_int(sorter)
2336-
if not is_extension_type(self._values):
2337-
# numpy searchsorted is only fast if value is of same dtype as the
2338-
# searched array. Below we ensure that value has the right dtype,
2339-
# and is not 0-dimensional.
2340-
value = np.asarray(value, dtype=self._values.dtype)
2341-
value = value[..., np.newaxis] if value.ndim == 0 else value
2342-
2343-
result = self._values.searchsorted(value, side=side, sorter=sorter)
2344-
return result[0] if is_scalar(value) else result
2334+
result = com.searchsorted(self._values, value,
2335+
side=side, sorter=sorter)
2336+
2337+
if is_scalar(value):
2338+
return result if is_scalar(result) else result[0]
2339+
return result
23452340

23462341
# -------------------------------------------------------------------
23472342
# Combination

0 commit comments

Comments
 (0)