|
12 | 12 | import numpy as np
|
13 | 13 |
|
14 | 14 | from pandas._libs import lib, tslibs
|
| 15 | +from pandas.compat import PY36, OrderedDict, iteritems |
| 16 | + |
15 | 17 | from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
16 |
| -from pandas import compat |
17 |
| -from pandas.compat import iteritems, PY36, OrderedDict |
18 |
| -from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass |
19 | 18 | from pandas.core.dtypes.common import (
|
20 |
| - is_integer, is_integer_dtype, is_bool_dtype, |
21 |
| - is_extension_array_dtype, is_array_like, is_object_dtype, |
22 |
| - is_categorical_dtype, is_numeric_dtype, is_scalar, ensure_platform_int) |
| 19 | + ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype, |
| 20 | + is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype, |
| 21 | + is_object_dtype, is_scalar) |
| 22 | +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries |
23 | 23 | from pandas.core.dtypes.inference import _iterable_not_string
|
24 | 24 | from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
|
25 | 25 |
|
| 26 | +from pandas import compat |
| 27 | + |
26 | 28 |
|
27 | 29 | class SettingWithCopyError(ValueError):
|
28 | 30 | pass
|
@@ -484,87 +486,79 @@ def f(x):
|
484 | 486 | return f
|
485 | 487 |
|
486 | 488 |
|
487 |
| -def searchsorted_integer(arr, value, side="left", sorter=None): |
488 |
| - """ |
489 |
| - searchsorted implementation for searching integer arrays. |
490 |
| -
|
491 |
| - We get a speedup if we ensure the dtype of arr and value are the same |
492 |
| - (if possible) before searchingm as numpy implicitly converts the dtypes |
493 |
| - if they're different, which would cause a slowdown. |
494 |
| -
|
495 |
| - See :func:`searchsorted` for a more general searchsorted implementation. |
496 |
| -
|
497 |
| - Parameters |
498 |
| - ---------- |
499 |
| - arr : numpy.array |
500 |
| - a numpy array of integers |
501 |
| - value : int or numpy.array |
502 |
| - an integer or an array of integers that we want to find the |
503 |
| - location(s) for in `arr` |
504 |
| - side : str |
505 |
| - One of {'left', 'right'} |
506 |
| - sorter : numpy.array, optional |
507 |
| -
|
508 |
| - Returns |
509 |
| - ------- |
510 |
| - int or numpy.array |
511 |
| - The locations(s) of `value` in `arr`. |
512 |
| - """ |
513 |
| - from .arrays.array_ import array |
514 |
| - if sorter is not None: |
515 |
| - sorter = ensure_platform_int(sorter) |
516 |
| - |
517 |
| - # below we try to give `value` the same dtype as `arr`, while guarding |
518 |
| - # against integer overflows. If the value of `value` is outside of the |
519 |
| - # bound of `arr`, `arr` would be recast by numpy, causing a slower search. |
520 |
| - value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
521 |
| - iinfo = np.iinfo(arr.dtype.type) |
522 |
| - if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
523 |
| - dtype = arr.dtype |
524 |
| - else: |
525 |
| - dtype = value_arr.dtype |
526 |
| - |
527 |
| - if is_scalar(value): |
528 |
| - value = dtype.type(value) |
529 |
| - else: |
530 |
| - value = array(value, dtype=dtype) |
531 |
| - |
532 |
| - return arr.searchsorted(value, side=side, sorter=sorter) |
533 |
| - |
534 |
| - |
535 | 489 | def searchsorted(arr, value, side="left", sorter=None):
|
536 | 490 | """
|
537 | 491 | Find indices where elements should be inserted to maintain order.
|
538 | 492 |
|
539 |
| - Find the indices into a sorted array-like `arr` such that, if the |
| 493 | + .. versionadded:: 0.25.0 |
| 494 | +
|
| 495 | + Find the indices into a sorted array `self` (a) such that, if the |
540 | 496 | corresponding elements in `value` were inserted before the indices,
|
541 |
| - the order of `arr` would be preserved. |
| 497 | + the order of `self` would be preserved. |
| 498 | +
|
| 499 | + Assuming that `self` is sorted: |
542 | 500 |
|
543 |
| - See :class:`IndexOpsMixin.searchsorted` for more details and examples. |
| 501 | + ====== ================================ |
| 502 | + `side` returned index `i` satisfies |
| 503 | + ====== ================================ |
| 504 | + left ``self[i-1] < value <= self[i]`` |
| 505 | + right ``self[i-1] <= value < self[i]`` |
| 506 | + ====== ================================ |
544 | 507 |
|
545 | 508 | Parameters
|
546 | 509 | ----------
|
547 |
| - arr : numpy.array or ExtensionArray |
548 |
| - value : scalar or numpy.array |
549 |
| - side : str |
550 |
| - One of {'left', 'right'} |
551 |
| - sorter : numpy.array, optional |
| 510 | + arr: numpy.array or ExtensionArray |
| 511 | + array to search in. Cannot be Index, Series or PandasArray, as that |
| 512 | + would cause a RecursionError. |
| 513 | + value : array_like |
| 514 | + Values to insert into `arr`. |
| 515 | + side : {'left', 'right'}, optional |
| 516 | + If 'left', the index of the first suitable location found is given. |
| 517 | + If 'right', return the last such index. If there is no suitable |
| 518 | + index, return either 0 or N (where N is the length of `self`). |
| 519 | + sorter : 1-D array_like, optional |
| 520 | + Optional array of integer indices that sort array a into ascending |
| 521 | + order. They are typically the result of argsort. |
552 | 522 |
|
553 | 523 | Returns
|
554 | 524 | -------
|
555 |
| - int or numpy.array |
556 |
| - The locations(s) of `value` in `arr`. |
| 525 | + array of ints |
| 526 | + Array of insertion points with the same shape as `value`. |
| 527 | +
|
| 528 | + See Also |
| 529 | + -------- |
| 530 | + numpy.searchsorted : Similar method from NumPy. |
557 | 531 | """
|
558 | 532 | if sorter is not None:
|
559 | 533 | sorter = ensure_platform_int(sorter)
|
560 | 534 |
|
561 | 535 | if is_integer_dtype(arr) and (
|
562 | 536 | is_integer(value) or is_integer_dtype(value)):
|
563 |
| - return searchsorted_integer(arr, value, side=side, sorter=sorter) |
564 |
| - if not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
565 |
| - is_categorical_dtype(arr)): |
| 537 | + from .arrays.array_ import array |
| 538 | + # if `arr` and `value` have different dtypes, `arr` would be |
| 539 | + # recast by numpy, causing a slow search. |
| 540 | + # Before searching below, we therefore try to give `value` the |
| 541 | + # same dtype as `arr`, while guarding against integer overflows. |
| 542 | + iinfo = np.iinfo(arr.dtype.type) |
| 543 | + value_arr = np.array([value]) if is_scalar(value) else np.array(value) |
| 544 | + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): |
| 545 | + # value within bounds, so no overflow, so can convert value dtype |
| 546 | + # to dtype of arr |
| 547 | + dtype = arr.dtype |
| 548 | + else: |
| 549 | + dtype = value_arr.dtype |
| 550 | + |
| 551 | + if is_scalar(value): |
| 552 | + value = dtype.type(value) |
| 553 | + else: |
| 554 | + value = array(value, dtype=dtype) |
| 555 | + elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or |
| 556 | + is_categorical_dtype(arr)): |
| 557 | + from pandas.core.series import Series |
566 | 558 | # E.g. if `arr` is an array with dtype='datetime64[ns]'
|
567 | 559 | # and `value` is a pd.Timestamp, we may need to convert value
|
568 |
| - from pandas.core.series import Series |
569 |
| - value = Series(value)._values |
570 |
| - return arr.searchsorted(value, side=side, sorter=sorter) |
| 560 | + value_ser = Series(value)._values |
| 561 | + value = value_ser[0] if is_scalar(value) else value_ser |
| 562 | + |
| 563 | + result = arr.searchsorted(value, side=side, sorter=sorter) |
| 564 | + return result |
0 commit comments