Skip to content

Commit 3c4be2b

Browse files
authored
PERF: Series slicing (#52145)
1 parent 4a2ea81 commit 3c4be2b

File tree

2 files changed

+27
-28
lines changed

2 files changed

+27
-28
lines changed

pandas/core/indexes/base.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@
9393
ensure_platform_int,
9494
is_any_real_numeric_dtype,
9595
is_bool_dtype,
96-
is_categorical_dtype,
9796
is_dtype_equal,
9897
is_ea_or_datetimelike_dtype,
9998
is_extension_array_dtype,
@@ -102,7 +101,6 @@
102101
is_hashable,
103102
is_integer,
104103
is_integer_dtype,
105-
is_interval_dtype,
106104
is_iterator,
107105
is_list_like,
108106
is_numeric_dtype,
@@ -3746,7 +3744,7 @@ def get_indexer(
37463744
# matched to Interval scalars
37473745
return self._get_indexer_non_comparable(target, method=method, unique=True)
37483746

3749-
if is_categorical_dtype(self.dtype):
3747+
if isinstance(self.dtype, CategoricalDtype):
37503748
# _maybe_cast_listlike_indexer ensures target has our dtype
37513749
# (could improve perf by doing _should_compare check earlier?)
37523750
assert is_dtype_equal(self.dtype, target.dtype)
@@ -3764,7 +3762,7 @@ def get_indexer(
37643762
indexer[mask & ~target_nans] = -1
37653763
return indexer
37663764

3767-
if is_categorical_dtype(target.dtype):
3765+
if isinstance(target.dtype, CategoricalDtype):
37683766
# potential fastpath
37693767
# get an indexer for unique categories then propagate to codes via take_nd
37703768
# get_indexer instead of _get_indexer needed for MultiIndex cases
@@ -3842,8 +3840,8 @@ def _should_partial_index(self, target: Index) -> bool:
38423840
"""
38433841
Should we attempt partial-matching indexing?
38443842
"""
3845-
if is_interval_dtype(self.dtype):
3846-
if is_interval_dtype(target.dtype):
3843+
if isinstance(self.dtype, IntervalDtype):
3844+
if isinstance(target.dtype, IntervalDtype):
38473845
return False
38483846
# See https://github.com/pandas-dev/pandas/issues/47772 the commented
38493847
# out code can be restored (instead of hardcoding `return True`)
@@ -3880,7 +3878,7 @@ def _check_indexing_method(
38803878
"tolerance not implemented yet for MultiIndex"
38813879
)
38823880

3883-
if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
3881+
if isinstance(self.dtype, (IntervalDtype, CategoricalDtype)):
38843882
# GH#37871 for now this is only for IntervalIndex and CategoricalIndex
38853883
if method is not None:
38863884
raise NotImplementedError(
@@ -4082,7 +4080,7 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
40824080

40834081
# TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
40844082
# to simplify this.
4085-
if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype):
4083+
if isinstance(self.dtype, np.dtype) and self.dtype.kind == "f":
40864084
# We always treat __getitem__ slicing as label-based
40874085
# translate to locations
40884086
return self.slice_indexer(start, stop, step)
@@ -4096,14 +4094,14 @@ def is_int(v):
40964094
# special case for interval_dtype bc we do not do partial-indexing
40974095
# on integer Intervals when slicing
40984096
# TODO: write this in terms of e.g. should_partial_index?
4099-
ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(
4100-
self.dtype
4097+
ints_are_positional = self._should_fallback_to_positional or isinstance(
4098+
self.dtype, IntervalDtype
41014099
)
41024100
is_positional = is_index_slice and ints_are_positional
41034101

41044102
if kind == "getitem":
41054103
# called from the getitem slicers, validate that we are in fact integers
4106-
if is_integer_dtype(self.dtype) or is_index_slice:
4104+
if is_index_slice or is_integer_dtype(self.dtype):
41074105
# Note: these checks are redundant if we know is_index_slice
41084106
self._validate_indexer("slice", key.start, "getitem")
41094107
self._validate_indexer("slice", key.stop, "getitem")
@@ -4507,7 +4505,7 @@ def join(
45074505
return self._join_non_unique(other, how=how)
45084506
elif not self.is_unique or not other.is_unique:
45094507
if self.is_monotonic_increasing and other.is_monotonic_increasing:
4510-
if not is_interval_dtype(self.dtype):
4508+
if not isinstance(self.dtype, IntervalDtype):
45114509
# otherwise we will fall through to _join_via_get_indexer
45124510
# GH#39133
45134511
# go through object dtype for ea till engine is supported properly
@@ -4520,7 +4518,7 @@ def join(
45204518
and other.is_monotonic_increasing
45214519
and self._can_use_libjoin
45224520
and not isinstance(self, ABCMultiIndex)
4523-
and not is_categorical_dtype(self.dtype)
4521+
and not isinstance(self.dtype, CategoricalDtype)
45244522
):
45254523
# Categorical is monotonic if data are ordered as categories, but join can
45264524
# not handle this in case of not lexicographically monotonic GH#38502
@@ -4904,7 +4902,7 @@ def _can_use_libjoin(self) -> bool:
49044902
or isinstance(self.values, BaseMaskedArray)
49054903
or isinstance(self._values, ArrowExtensionArray)
49064904
)
4907-
return not is_interval_dtype(self.dtype)
4905+
return not isinstance(self.dtype, IntervalDtype)
49084906

49094907
# --------------------------------------------------------------------
49104908
# Uncategorized Methods
@@ -5230,7 +5228,7 @@ def _can_hold_identifiers_and_holds_name(self, name) -> bool:
52305228
if (
52315229
is_object_dtype(self.dtype)
52325230
or is_string_dtype(self.dtype)
5233-
or is_categorical_dtype(self.dtype)
5231+
or isinstance(self.dtype, CategoricalDtype)
52345232
):
52355233
return name in self
52365234
return False
@@ -5930,11 +5928,11 @@ def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
59305928
if nmissing:
59315929
# TODO: remove special-case; this is just to keep exception
59325930
# message tests from raising while debugging
5933-
use_interval_msg = is_interval_dtype(self.dtype) or (
5934-
is_categorical_dtype(self.dtype)
5931+
use_interval_msg = isinstance(self.dtype, IntervalDtype) or (
5932+
isinstance(self.dtype, CategoricalDtype)
59355933
# "Index" has no attribute "categories" [attr-defined]
5936-
and is_interval_dtype(
5937-
self.categories.dtype # type: ignore[attr-defined]
5934+
and isinstance(
5935+
self.categories.dtype, IntervalDtype # type: ignore[attr-defined]
59385936
)
59395937
)
59405938

@@ -6942,8 +6940,7 @@ def _maybe_disable_logical_methods(self, opname: str_t) -> None:
69426940
if (
69436941
isinstance(self, ABCMultiIndex)
69446942
or needs_i8_conversion(self.dtype)
6945-
or is_interval_dtype(self.dtype)
6946-
or is_categorical_dtype(self.dtype)
6943+
or isinstance(self.dtype, (IntervalDtype, CategoricalDtype))
69476944
or is_float_dtype(self.dtype)
69486945
):
69496946
# This call will raise

pandas/core/series.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,13 @@ def __getitem__(self, key):
981981
# in the first level of our MultiIndex
982982
return self._get_values_tuple(key)
983983

984+
if isinstance(key, slice):
985+
# Do slice check before somewhat-costly is_bool_indexer
986+
# _convert_slice_indexer to determine if this slice is positional
987+
# or label based, and if the latter, convert to positional
988+
slobj = self.index._convert_slice_indexer(key, kind="getitem")
989+
return self._slice(slobj)
990+
984991
if is_iterator(key):
985992
key = list(key)
986993

@@ -993,12 +1000,7 @@ def __getitem__(self, key):
9931000

9941001
def _get_with(self, key):
9951002
# other: fancy integer or otherwise
996-
if isinstance(key, slice):
997-
# _convert_slice_indexer to determine if this slice is positional
998-
# or label based, and if the latter, convert to positional
999-
slobj = self.index._convert_slice_indexer(key, kind="getitem")
1000-
return self._slice(slobj)
1001-
elif isinstance(key, ABCDataFrame):
1003+
if isinstance(key, ABCDataFrame):
10021004
raise TypeError(
10031005
"Indexing a Series with DataFrame is not "
10041006
"supported, use the appropriate DataFrame column"
@@ -1053,7 +1055,7 @@ def _get_values_tuple(self, key: tuple):
10531055

10541056
def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
10551057
new_mgr = self._mgr.getitem_mgr(indexer)
1056-
return self._constructor(new_mgr).__finalize__(self)
1058+
return self._constructor(new_mgr, fastpath=True).__finalize__(self)
10571059

10581060
def _get_value(self, label, takeable: bool = False):
10591061
"""

0 commit comments

Comments
 (0)