Skip to content

Commit d91b04c

Browse files
authored
PERF: dtype checks (#52766)
* PERF: dtype checks * mypy fixup
1 parent c85bbc6 commit d91b04c

File tree

15 files changed

+67
-79
lines changed

15 files changed

+67
-79
lines changed

pandas/_testing/asserters.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from pandas.core.dtypes.common import (
1818
is_bool,
19-
is_extension_array_dtype,
2019
is_integer_dtype,
2120
is_number,
2221
is_numeric_dtype,
@@ -316,7 +315,7 @@ def _get_ilevel_values(index, level):
316315
if not left.equals(right):
317316
mismatch = left._values != right._values
318317

319-
if is_extension_array_dtype(mismatch):
318+
if not isinstance(mismatch, np.ndarray):
320319
mismatch = cast("ExtensionArray", mismatch).fillna(True)
321320

322321
diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)

pandas/core/algorithms.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
is_integer,
5050
is_integer_dtype,
5151
is_list_like,
52-
is_numeric_dtype,
5352
is_object_dtype,
5453
is_scalar,
5554
is_signed_integer_dtype,
@@ -471,7 +470,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
471470

472471
if (
473472
len(values) > 0
474-
and is_numeric_dtype(values.dtype)
473+
and values.dtype.kind in "iufcb"
475474
and not is_signed_integer_dtype(comps)
476475
):
477476
# GH#46485 Use object to avoid upcast to float64 later
@@ -1403,7 +1402,7 @@ def diff(arr, n: int, axis: AxisInt = 0):
14031402
)
14041403

14051404
is_timedelta = False
1406-
if needs_i8_conversion(arr.dtype):
1405+
if arr.dtype.kind in "mM":
14071406
dtype = np.int64
14081407
arr = arr.view("i8")
14091408
na = iNaT
@@ -1413,7 +1412,7 @@ def diff(arr, n: int, axis: AxisInt = 0):
14131412
# We have to cast in order to be able to hold np.nan
14141413
dtype = np.object_
14151414

1416-
elif is_integer_dtype(dtype):
1415+
elif dtype.kind in "iu":
14171416
# We have to cast in order to be able to hold np.nan
14181417

14191418
# int8, int16 are incompatible with float64,

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
is_bool_dtype,
4141
is_dict_like,
4242
is_dtype_equal,
43-
is_extension_array_dtype,
4443
is_hashable,
4544
is_integer_dtype,
4645
is_list_like,
@@ -618,7 +617,7 @@ def _from_inferred_categories(
618617

619618
if known_categories:
620619
# Convert to a specialized type with `dtype` if specified.
621-
if is_any_real_numeric_dtype(dtype.categories):
620+
if is_any_real_numeric_dtype(dtype.categories.dtype):
622621
cats = to_numeric(inferred_categories, errors="coerce")
623622
elif lib.is_np_dtype(dtype.categories.dtype, "M"):
624623
cats = to_datetime(inferred_categories, errors="coerce")
@@ -701,7 +700,7 @@ def from_codes(
701700
)
702701
raise ValueError(msg)
703702

704-
if is_extension_array_dtype(codes) and is_integer_dtype(codes):
703+
if isinstance(codes, ExtensionArray) and is_integer_dtype(codes.dtype):
705704
# Avoid the implicit conversion of Int to object
706705
if isna(codes).any():
707706
raise ValueError("codes cannot contain NA values")
@@ -1598,7 +1597,7 @@ def _internal_get_values(self):
15981597
# if we are a datetime and period index, return Index to keep metadata
15991598
if needs_i8_conversion(self.categories.dtype):
16001599
return self.categories.take(self._codes, fill_value=NaT)
1601-
elif is_integer_dtype(self.categories) and -1 in self._codes:
1600+
elif is_integer_dtype(self.categories.dtype) and -1 in self._codes:
16021601
return self.categories.astype("object").take(self._codes, fill_value=np.nan)
16031602
return np.array(self)
16041603

@@ -1809,7 +1808,7 @@ def _values_for_rank(self) -> np.ndarray:
18091808
if mask.any():
18101809
values = values.astype("float64")
18111810
values[mask] = np.nan
1812-
elif is_any_real_numeric_dtype(self.categories):
1811+
elif is_any_real_numeric_dtype(self.categories.dtype):
18131812
values = np.array(self)
18141813
else:
18151814
# reorder the categories (so rank can use the float codes)

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
is_datetime64_any_dtype,
5454
is_dtype_equal,
5555
is_float_dtype,
56-
is_object_dtype,
5756
is_sparse,
5857
is_string_dtype,
5958
pandas_dtype,
@@ -2038,11 +2037,7 @@ def _sequence_to_dt64ns(
20382037
if out_unit is not None:
20392038
out_dtype = np.dtype(f"M8[{out_unit}]")
20402039

2041-
if (
2042-
is_object_dtype(data_dtype)
2043-
or is_string_dtype(data_dtype)
2044-
or is_sparse(data_dtype)
2045-
):
2040+
if data_dtype == object or is_string_dtype(data_dtype) or is_sparse(data_dtype):
20462041
# TODO: We do not have tests specific to string-dtypes,
20472042
# also complex or categorical or other extension
20482043
copy = False

pandas/core/arrays/masked.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,9 @@
4141
from pandas.core.dtypes.base import ExtensionDtype
4242
from pandas.core.dtypes.common import (
4343
is_bool,
44-
is_bool_dtype,
4544
is_dtype_equal,
46-
is_float_dtype,
4745
is_integer_dtype,
4846
is_list_like,
49-
is_object_dtype,
5047
is_scalar,
5148
is_string_dtype,
5249
pandas_dtype,
@@ -408,9 +405,11 @@ def to_numpy(
408405
na_value = libmissing.NA
409406
if dtype is None:
410407
dtype = object
408+
else:
409+
dtype = np.dtype(dtype)
411410
if self._hasna:
412411
if (
413-
not is_object_dtype(dtype)
412+
dtype != object
414413
and not is_string_dtype(dtype)
415414
and na_value is libmissing.NA
416415
):
@@ -545,7 +544,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
545544
else:
546545
inputs2.append(x)
547546

548-
def reconstruct(x):
547+
def reconstruct(x: np.ndarray):
549548
# we don't worry about scalar `x` here, since we
550549
# raise for reduce up above.
551550
from pandas.core.arrays import (
@@ -554,13 +553,13 @@ def reconstruct(x):
554553
IntegerArray,
555554
)
556555

557-
if is_bool_dtype(x.dtype):
556+
if x.dtype.kind == "b":
558557
m = mask.copy()
559558
return BooleanArray(x, m)
560-
elif is_integer_dtype(x.dtype):
559+
elif x.dtype.kind in "iu":
561560
m = mask.copy()
562561
return IntegerArray(x, m)
563-
elif is_float_dtype(x.dtype):
562+
elif x.dtype.kind == "f":
564563
m = mask.copy()
565564
if x.dtype == np.float16:
566565
# reached in e.g. np.sqrt on BooleanArray
@@ -763,7 +762,9 @@ def _cmp_method(self, other, op) -> BooleanArray:
763762
mask = self._propagate_mask(mask, other)
764763
return BooleanArray(result, mask, copy=False)
765764

766-
def _maybe_mask_result(self, result, mask):
765+
def _maybe_mask_result(
766+
self, result: np.ndarray | tuple[np.ndarray, np.ndarray], mask: np.ndarray
767+
):
767768
"""
768769
Parameters
769770
----------
@@ -778,12 +779,12 @@ def _maybe_mask_result(self, result, mask):
778779
self._maybe_mask_result(mod, mask),
779780
)
780781

781-
if is_float_dtype(result.dtype):
782+
if result.dtype.kind == "f":
782783
from pandas.core.arrays import FloatingArray
783784

784785
return FloatingArray(result, mask, copy=False)
785786

786-
elif is_bool_dtype(result.dtype):
787+
elif result.dtype.kind == "b":
787788
from pandas.core.arrays import BooleanArray
788789

789790
return BooleanArray(result, mask, copy=False)
@@ -794,13 +795,14 @@ def _maybe_mask_result(self, result, mask):
794795
# e.g. test_numeric_arr_mul_tdscalar_numexpr_path
795796
from pandas.core.arrays import TimedeltaArray
796797

798+
result[mask] = result.dtype.type("NaT")
799+
797800
if not isinstance(result, TimedeltaArray):
798-
result = TimedeltaArray._simple_new(result, dtype=result.dtype)
801+
return TimedeltaArray._simple_new(result, dtype=result.dtype)
799802

800-
result[mask] = result.dtype.type("NaT")
801803
return result
802804

803-
elif is_integer_dtype(result.dtype):
805+
elif result.dtype.kind in "iu":
804806
from pandas.core.arrays import IntegerArray
805807

806808
return IntegerArray(result, mask, copy=False)
@@ -875,7 +877,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
875877
result = isin(self._data, values_arr)
876878

877879
if self._hasna:
878-
values_have_NA = is_object_dtype(values_arr.dtype) and any(
880+
values_have_NA = values_arr.dtype == object and any(
879881
val is self.dtype.na_value for val in values_arr
880882
)
881883

pandas/core/arrays/timedeltas.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,14 @@
4747
from pandas.core.dtypes.common import (
4848
TD64NS_DTYPE,
4949
is_dtype_equal,
50-
is_extension_array_dtype,
5150
is_float_dtype,
5251
is_integer_dtype,
5352
is_object_dtype,
5453
is_scalar,
5554
is_string_dtype,
56-
is_timedelta64_dtype,
5755
pandas_dtype,
5856
)
57+
from pandas.core.dtypes.dtypes import ExtensionDtype
5958
from pandas.core.dtypes.missing import isna
6059

6160
from pandas.core import (
@@ -137,7 +136,7 @@ class TimedeltaArray(dtl.TimelikeOps):
137136
_typ = "timedeltaarray"
138137
_internal_fill_value = np.timedelta64("NaT", "ns")
139138
_recognized_scalars = (timedelta, np.timedelta64, Tick)
140-
_is_recognized_dtype = is_timedelta64_dtype
139+
_is_recognized_dtype = lambda x: lib.is_np_dtype(x, "m")
141140
_infer_matches = ("timedelta", "timedelta64")
142141

143142
@property
@@ -912,7 +911,7 @@ def sequence_to_td64ns(
912911
inferred_freq = data.freq
913912

914913
# Convert whatever we have into timedelta64[ns] dtype
915-
if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
914+
if data.dtype == object or is_string_dtype(data.dtype):
916915
# no need to make a copy, need to convert if string-dtyped
917916
data = _objects_to_td64ns(data, unit=unit, errors=errors)
918917
copy = False
@@ -925,7 +924,7 @@ def sequence_to_td64ns(
925924
elif is_float_dtype(data.dtype):
926925
# cast the unit, multiply base/frac separately
927926
# to avoid precision issues from float -> int
928-
if is_extension_array_dtype(data.dtype):
927+
if isinstance(data.dtype, ExtensionDtype):
929928
mask = data._mask
930929
data = data._data
931930
else:

pandas/core/dtypes/cast.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@
5353
is_extension_array_dtype,
5454
is_float,
5555
is_integer,
56-
is_integer_dtype,
57-
is_numeric_dtype,
5856
is_object_dtype,
5957
is_scalar,
6058
is_string_dtype,
@@ -472,7 +470,7 @@ def maybe_cast_pointwise_result(
472470
else:
473471
result = maybe_cast_to_extension_array(cls, result)
474472

475-
elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only:
473+
elif (numeric_only and dtype.kind in "iufcb") or not numeric_only:
476474
result = maybe_downcast_to_dtype(result, dtype)
477475

478476
return result
@@ -1041,13 +1039,13 @@ def convert_dtypes(
10411039
if convert_integer:
10421040
target_int_dtype = pandas_dtype_func("Int64")
10431041

1044-
if is_integer_dtype(input_array.dtype):
1042+
if input_array.dtype.kind in "iu":
10451043
from pandas.core.arrays.integer import INT_STR_TO_DTYPE
10461044

10471045
inferred_dtype = INT_STR_TO_DTYPE.get(
10481046
input_array.dtype.name, target_int_dtype
10491047
)
1050-
elif is_numeric_dtype(input_array.dtype):
1048+
elif input_array.dtype.kind in "fcb":
10511049
# TODO: de-dup with maybe_cast_to_integer_array?
10521050
arr = input_array[notna(input_array)]
10531051
if (arr.astype(int) == arr).all():
@@ -1062,9 +1060,8 @@ def convert_dtypes(
10621060
inferred_dtype = target_int_dtype
10631061

10641062
if convert_floating:
1065-
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
1066-
input_array.dtype
1067-
):
1063+
if input_array.dtype.kind in "fcb":
1064+
# i.e. numeric but not integer
10681065
from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
10691066

10701067
inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get(

pandas/core/dtypes/common.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def classes(*klasses) -> Callable:
122122
return lambda tipo: issubclass(tipo, klasses)
123123

124124

125-
def classes_and_not_datetimelike(*klasses) -> Callable:
125+
def _classes_and_not_datetimelike(*klasses) -> Callable:
126126
"""
127127
Evaluate if the tipo is a subclass of the klasses
128128
and not a datetimelike.
@@ -654,7 +654,7 @@ def is_integer_dtype(arr_or_dtype) -> bool:
654654
False
655655
"""
656656
return _is_dtype_type(
657-
arr_or_dtype, classes_and_not_datetimelike(np.integer)
657+
arr_or_dtype, _classes_and_not_datetimelike(np.integer)
658658
) or _is_dtype(
659659
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
660660
)
@@ -713,7 +713,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool:
713713
False
714714
"""
715715
return _is_dtype_type(
716-
arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)
716+
arr_or_dtype, _classes_and_not_datetimelike(np.signedinteger)
717717
) or _is_dtype(
718718
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "i"
719719
)
@@ -763,7 +763,7 @@ def is_unsigned_integer_dtype(arr_or_dtype) -> bool:
763763
True
764764
"""
765765
return _is_dtype_type(
766-
arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)
766+
arr_or_dtype, _classes_and_not_datetimelike(np.unsignedinteger)
767767
) or _is_dtype(
768768
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "u"
769769
)
@@ -1087,7 +1087,7 @@ def is_numeric_dtype(arr_or_dtype) -> bool:
10871087
False
10881088
"""
10891089
return _is_dtype_type(
1090-
arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)
1090+
arr_or_dtype, _classes_and_not_datetimelike(np.number, np.bool_)
10911091
) or _is_dtype(
10921092
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric
10931093
)
@@ -1490,7 +1490,7 @@ def infer_dtype_from_object(dtype) -> type:
14901490
except TypeError:
14911491
pass
14921492

1493-
if is_extension_array_dtype(dtype):
1493+
if isinstance(dtype, ExtensionDtype):
14941494
return dtype.type
14951495
elif isinstance(dtype, str):
14961496
# TODO(jreback)
@@ -1644,7 +1644,6 @@ def is_all_strings(value: ArrayLike) -> bool:
16441644

16451645
__all__ = [
16461646
"classes",
1647-
"classes_and_not_datetimelike",
16481647
"DT64NS_DTYPE",
16491648
"ensure_float64",
16501649
"ensure_python_int",

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6138,7 +6138,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
61386138
elif is_numeric_dtype(self.dtype):
61396139
return is_numeric_dtype(dtype)
61406140
# TODO: this was written assuming we only get here with object-dtype,
6141-
# which is nom longer correct. Can we specialize for EA?
6141+
# which is no longer correct. Can we specialize for EA?
61426142
return True
61436143

61446144
@final

pandas/core/internals/array_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
ensure_platform_int,
3030
is_datetime64_ns_dtype,
3131
is_dtype_equal,
32-
is_extension_array_dtype,
3332
is_integer,
3433
is_numeric_dtype,
3534
is_object_dtype,
@@ -1125,7 +1124,7 @@ def as_array(
11251124
dtype = dtype.subtype
11261125
elif isinstance(dtype, PandasDtype):
11271126
dtype = dtype.numpy_dtype
1128-
elif is_extension_array_dtype(dtype):
1127+
elif isinstance(dtype, ExtensionDtype):
11291128
dtype = "object"
11301129
elif is_dtype_equal(dtype, str):
11311130
dtype = "object"

0 commit comments

Comments
 (0)