14
14
from pandas ._libs import lib , tslibs
15
15
from pandas .core .dtypes .cast import construct_1d_object_array_from_listlike
16
16
from pandas import compat
17
- from pandas .compat import iteritems , PY2 , PY36 , OrderedDict
17
+ from pandas .compat import iteritems , PY36 , OrderedDict
18
18
from pandas .core .dtypes .generic import ABCSeries , ABCIndex , ABCIndexClass
19
- from pandas .core .dtypes .common import (is_integer , is_integer_dtype ,
20
- is_bool_dtype , is_extension_array_dtype ,
21
- is_array_like ,
22
- is_float_dtype , is_object_dtype ,
23
- is_categorical_dtype , is_numeric_dtype ,
24
- is_scalar , ensure_platform_int )
19
+ from pandas .core .dtypes .common import (
20
+ is_integer , is_integer_dtype , is_bool_dtype ,
21
+ is_extension_array_dtype , is_array_like , is_object_dtype ,
22
+ is_categorical_dtype , is_numeric_dtype , is_scalar , ensure_platform_int )
25
23
from pandas .core .dtypes .inference import _iterable_not_string
26
24
from pandas .core .dtypes .missing import isna , isnull , notnull # noqa
27
25
@@ -486,58 +484,47 @@ def f(x):
486
484
return f
487
485
488
486
489
- def ensure_integer_dtype (arr , value ):
487
+ def searchsorted_integer (arr , value , side = "left" , sorter = None ):
490
488
"""
491
- Ensure optimal dtype for :func:`searchsorted_integer` is returned.
489
+ searchsorted implementation for searching integer arrays.
490
+
491
+ We get a speedup if we ensure the dtype of arr and value are the same
492
+ (if possible) before searchingm as numpy implicitly converts the dtypes
493
+ if they're different, which would cause a slowdown.
494
+
495
+ See :func:`searchsorted` for a more general searchsorted implementation.
492
496
493
497
Parameters
494
498
----------
495
- arr : a numpy integer array
496
- value : a number or array of numbers
499
+ arr : numpy.array
500
+ a numpy array of integers
501
+ value : int or numpy.array
502
+ an integer or an array of integers that we want to find the
503
+ location(s) for in `arr`
504
+ side : str
505
+ One of {'left', 'right'}
506
+ sorter : numpy.array, optional
497
507
498
508
Returns
499
509
-------
500
- dtype : an numpy integer dtype
501
-
502
- Raises
503
- ------
504
- TypeError : if value is not a number
505
- """
506
- value_arr = np .array ([value ]) if is_scalar (value ) else np .array (value )
507
-
508
- if PY2 and not is_numeric_dtype (value_arr ):
509
- # python 2 allows "a" < 1, avoid such nonsense
510
- msg = "value must be numeric, was type {}"
511
- raise TypeError (msg .format (value ))
512
-
513
- iinfo = np .iinfo (arr .dtype )
514
- if not ((value_arr < iinfo .min ).any () or (value_arr > iinfo .max ).any ()):
515
- return arr .dtype
516
- else :
517
- return value_arr .dtype
518
-
519
-
520
- def searchsorted_integer (arr , value , side = "left" , sorter = None ):
521
- """
522
- searchsorted implementation, but only for integer arrays.
523
-
524
- We get a speedup if the dtype of arr and value is the same.
525
-
526
- See :func:`searchsorted` for a more general searchsorted implementation.
510
+ int or numpy.array
511
+ The locations(s) of `value` in `arr`.
527
512
"""
528
513
if sorter is not None :
529
514
sorter = ensure_platform_int (sorter )
530
515
531
- dtype = ensure_integer_dtype (arr , value )
532
-
533
- if is_integer (value ) or is_integer_dtype (value ):
534
- value = np .asarray (value , dtype = dtype )
535
- elif hasattr (value , 'is_integer' ) and value .is_integer ():
536
- # float 2.0 can be converted to int 2 for better speed,
537
- # but float 2.2 should *not* be converted to int 2
538
- value = np .asarray (value , dtype = dtype )
516
+ # below we try to give `value` the same dtype as `arr`, while guarding
517
+ # against integer overflows. If the value of `value` is outside of the
518
+ # bound of `arr`, `arr` would be recast by numpy, causing a slower search.
519
+ value_arr = np .array ([value ]) if is_scalar (value ) else np .array (value )
520
+ iinfo = np .iinfo (arr .dtype )
521
+ if (value_arr >= iinfo .min ).all () and (value_arr <= iinfo .max ).all ():
522
+ dtype = arr .dtype
523
+ else :
524
+ dtype = value_arr .dtype
525
+ value = np .asarray (value , dtype = dtype )
539
526
540
- return np .searchsorted (arr , value , side = side , sorter = sorter )
527
+ return arr .searchsorted (value , side = side , sorter = sorter )
541
528
542
529
543
530
def searchsorted (arr , value , side = "left" , sorter = None ):
@@ -549,18 +536,30 @@ def searchsorted(arr, value, side="left", sorter=None):
549
536
the order of `arr` would be preserved.
550
537
551
538
See :class:`IndexOpsMixin.searchsorted` for more details and examples.
539
+
540
+ Parameters
541
+ ----------
542
+ arr : numpy.array or ExtensionArray
543
+ value : scalar or numpy.array
544
+ side : str
545
+ One of {'left', 'right'}
546
+ sorter : numpy.array, optional
547
+
548
+ Returns
549
+ -------
550
+ int or numpy.array
551
+ The locations(s) of `value` in `arr`.
552
552
"""
553
553
if sorter is not None :
554
554
sorter = ensure_platform_int (sorter )
555
555
556
- if is_integer_dtype (arr ):
556
+ if is_integer_dtype (arr ) and (
557
+ is_integer (value ) or is_integer_dtype (value )):
557
558
return searchsorted_integer (arr , value , side = side , sorter = sorter )
558
- elif (is_object_dtype (arr ) or is_float_dtype (arr ) or
559
- is_categorical_dtype (arr )):
560
- return arr .searchsorted (value , side = side , sorter = sorter )
561
- else :
562
- # fallback solution. E.g. arr is an array with dtype='datetime64[ns]'
563
- # and value is a pd.Timestamp, need to convert value
559
+ if not (is_object_dtype (arr ) or is_numeric_dtype (arr ) or
560
+ is_categorical_dtype (arr )):
561
+ # E.g. if `arr` is an array with dtype='datetime64[ns]'
562
+ # and `value` is a pd.Timestamp, we may need to convert value
564
563
from pandas .core .series import Series
565
564
value = Series (value )._values
566
- return arr .searchsorted (value , side = side , sorter = sorter )
565
+ return arr .searchsorted (value , side = side , sorter = sorter )
0 commit comments