Skip to content

Commit 34b3e7f

Browse files
authored
PERF: dtype checks (#52682)
* PERF: faster dtype checks * PERF: dtype checks
1 parent 4017e9c commit 34b3e7f

33 files changed

+109
-155
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
from pandas.core.dtypes.common import (
3131
is_float_dtype,
32-
is_integer_dtype,
3332
is_sequence,
3433
is_signed_integer_dtype,
3534
is_unsigned_integer_dtype,
@@ -389,11 +388,11 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
389388
dtype = pandas_dtype(dtype)
390389
assert isinstance(dtype, np.dtype)
391390

392-
if is_integer_dtype(dtype):
391+
if dtype.kind in "iu":
393392
values = np.arange(k, dtype=dtype)
394393
if is_unsigned_integer_dtype(dtype):
395394
values += 2 ** (dtype.itemsize * 8 - 1)
396-
elif is_float_dtype(dtype):
395+
elif dtype.kind == "f":
397396
values = np.random.random_sample(k) - np.random.random_sample(1)
398397
values.sort()
399398
values = values * (10 ** np.random.randint(0, 9))

pandas/core/algorithms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
510510
if (
511511
len(comps_array) > 1_000_000
512512
and len(values) <= 26
513-
and not is_object_dtype(comps_array)
513+
and comps_array.dtype != object
514514
):
515515
# If the values include nan we need to check for nan explicitly
516516
# since np.nan it not equal to np.nan
@@ -766,7 +766,7 @@ def factorize(
766766
else:
767767
values = np.asarray(values) # convert DTA/TDA/MultiIndex
768768

769-
if not use_na_sentinel and is_object_dtype(values):
769+
if not use_na_sentinel and values.dtype == object:
770770
# factorize can now handle differentiating various types of null values.
771771
# These can only occur when the array has object dtype.
772772
# However, for backwards compatibility we only use the null for the
@@ -1317,7 +1317,7 @@ def searchsorted(
13171317

13181318
if (
13191319
isinstance(arr, np.ndarray)
1320-
and is_integer_dtype(arr.dtype)
1320+
and arr.dtype.kind in "iu"
13211321
and (is_integer(value) or is_integer_dtype(value))
13221322
):
13231323
# if `arr` and `value` have different dtypes, `arr` would be

pandas/core/array_algos/masked_accumulations.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,6 @@
1212

1313
import numpy as np
1414

15-
from pandas.core.dtypes.common import (
16-
is_bool_dtype,
17-
is_float_dtype,
18-
is_integer_dtype,
19-
)
20-
2115
if TYPE_CHECKING:
2216
from pandas._typing import npt
2317

@@ -46,11 +40,11 @@ def _cum_func(
4640
Whether to skip NA.
4741
"""
4842
dtype_info: np.iinfo | np.finfo
49-
if is_float_dtype(values):
43+
if values.dtype.kind == "f":
5044
dtype_info = np.finfo(values.dtype.type)
51-
elif is_integer_dtype(values):
45+
elif values.dtype.kind in "iu":
5246
dtype_info = np.iinfo(values.dtype.type)
53-
elif is_bool_dtype(values):
47+
elif values.dtype.kind == "b":
5448
# Max value of bool is 1, but since we are setting into a boolean
5549
# array, 255 is fine as well. Min value has to be 0 when setting
5650
# into the boolean array.

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
is_array_like,
4444
is_bool_dtype,
4545
is_integer,
46-
is_integer_dtype,
4746
is_list_like,
4847
is_object_dtype,
4948
is_scalar,
@@ -363,9 +362,9 @@ def __getitem__(self, item: PositionalIndexer):
363362
else:
364363
pa_dtype = self._dtype.pyarrow_dtype
365364
return type(self)(pa.chunked_array([], type=pa_dtype))
366-
elif is_integer_dtype(item.dtype):
365+
elif item.dtype.kind in "iu":
367366
return self.take(item)
368-
elif is_bool_dtype(item.dtype):
367+
elif item.dtype.kind == "b":
369368
return type(self)(self._pa_array.filter(item))
370369
else:
371370
raise IndexError(

pandas/core/arrays/categorical.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
533533
elif isinstance(dtype, ExtensionDtype):
534534
return super().astype(dtype, copy=copy)
535535

536-
elif is_integer_dtype(dtype) and self.isna().any():
536+
elif dtype.kind in "iu" and self.isna().any():
537537
raise ValueError("Cannot convert float NaN to integer")
538538

539539
elif len(self.codes) == 0 or len(self.categories) == 0:
@@ -624,7 +624,7 @@ def _from_inferred_categories(
624624
cats = to_datetime(inferred_categories, errors="coerce")
625625
elif lib.is_np_dtype(dtype.categories.dtype, "m"):
626626
cats = to_timedelta(inferred_categories, errors="coerce")
627-
elif is_bool_dtype(dtype.categories):
627+
elif is_bool_dtype(dtype.categories.dtype):
628628
if true_values is None:
629629
true_values = ["True", "TRUE", "true"]
630630

@@ -708,7 +708,7 @@ def from_codes(
708708
codes = codes.to_numpy(dtype=np.int64)
709709
else:
710710
codes = np.asarray(codes)
711-
if len(codes) and not is_integer_dtype(codes):
711+
if len(codes) and codes.dtype.kind not in "iu":
712712
raise ValueError("codes need to be array-like integers")
713713

714714
if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):

pandas/core/arrays/datetimelike.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@
8585
is_all_strings,
8686
is_datetime64_any_dtype,
8787
is_dtype_equal,
88-
is_float_dtype,
8988
is_integer_dtype,
9089
is_list_like,
9190
is_object_dtype,
@@ -460,7 +459,7 @@ def astype(self, dtype, copy: bool = True):
460459
return super().astype(dtype, copy=copy)
461460
elif is_string_dtype(dtype):
462461
return self._format_native_types()
463-
elif is_integer_dtype(dtype):
462+
elif dtype.kind in "iu":
464463
# we deliberately ignore int32 vs. int64 here.
465464
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
466465
values = self.asi8
@@ -473,7 +472,7 @@ def astype(self, dtype, copy: bool = True):
473472
if copy:
474473
values = values.copy()
475474
return values
476-
elif (dtype.kind in "mM" and self.dtype != dtype) or is_float_dtype(dtype):
475+
elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f":
477476
# disallow conversion between datetime/timedelta,
478477
# and conversions for any datetimelike to float
479478
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"

pandas/core/arrays/datetimes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
INT64_DTYPE,
5252
is_bool_dtype,
5353
is_datetime64_any_dtype,
54-
is_datetime64_dtype,
5554
is_dtype_equal,
5655
is_float_dtype,
5756
is_object_dtype,
@@ -2190,11 +2189,11 @@ def objects_to_datetime64ns(
21902189
# is in UTC
21912190
# Return i8 values to denote unix timestamps
21922191
return result.view("i8"), tz_parsed
2193-
elif is_datetime64_dtype(result):
2192+
elif result.dtype.kind == "M":
21942193
# returning M8[ns] denotes wall-times; since tz is None
21952194
# the distinction is a thin one
21962195
return result, tz_parsed
2197-
elif is_object_dtype(result):
2196+
elif result.dtype == object:
21982197
# GH#23675 when called via `pd.to_datetime`, returning an object-dtype
21992198
# array is allowed. When called via `pd.DatetimeIndex`, we can
22002199
# only accept datetime64 dtype, so raise TypeError if object-dtype

pandas/core/arrays/interval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,9 @@ def _ensure_simple_new_inputs(
329329
raise ValueError("closed keyword does not match dtype.closed")
330330

331331
# coerce dtypes to match if needed
332-
if is_float_dtype(left) and is_integer_dtype(right):
332+
if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype):
333333
right = right.astype(left.dtype)
334-
elif is_float_dtype(right) and is_integer_dtype(left):
334+
elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype):
335335
left = left.astype(right.dtype)
336336

337337
if type(left) != type(right):
@@ -1778,6 +1778,6 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
17781778

17791779
if not hasattr(values, "dtype"):
17801780
values = np.asarray(values)
1781-
if is_integer_dtype(values) and values.dtype != np.int64:
1781+
if values.dtype.kind in "iu" and values.dtype != np.int64:
17821782
values = values.astype(np.int64)
17831783
return values

pandas/core/arrays/masked.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from pandas.core.dtypes.common import (
4343
is_bool,
4444
is_bool_dtype,
45-
is_datetime64_dtype,
4645
is_dtype_equal,
4746
is_float_dtype,
4847
is_integer_dtype,
@@ -478,18 +477,18 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
478477
na_value: float | np.datetime64 | lib.NoDefault
479478

480479
# coerce
481-
if is_float_dtype(dtype):
480+
if dtype.kind == "f":
482481
# In astype, we consider dtype=float to also mean na_value=np.nan
483482
na_value = np.nan
484-
elif is_datetime64_dtype(dtype):
483+
elif dtype.kind == "M":
485484
na_value = np.datetime64("NaT")
486485
else:
487486
na_value = lib.no_default
488487

489488
# to_numpy will also raise, but we get somewhat nicer exception messages here
490-
if is_integer_dtype(dtype) and self._hasna:
489+
if dtype.kind in "iu" and self._hasna:
491490
raise ValueError("cannot convert NA to integer")
492-
if is_bool_dtype(dtype) and self._hasna:
491+
if dtype.kind == "b" and self._hasna:
493492
# careful: astype_nansafe converts np.nan to True
494493
raise ValueError("cannot convert float NaN to bool")
495494

@@ -789,10 +788,8 @@ def _maybe_mask_result(self, result, mask):
789788

790789
return BooleanArray(result, mask, copy=False)
791790

792-
elif (
793-
isinstance(result.dtype, np.dtype)
794-
and result.dtype.kind == "m"
795-
and is_supported_unit(get_unit_from_dtype(result.dtype))
791+
elif lib.is_np_dtype(result.dtype, "m") and is_supported_unit(
792+
get_unit_from_dtype(result.dtype)
796793
):
797794
# e.g. test_numeric_arr_mul_tdscalar_numexpr_path
798795
from pandas.core.arrays import TimedeltaArray

pandas/core/arrays/numeric.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818
from pandas.util._decorators import cache_readonly
1919

2020
from pandas.core.dtypes.common import (
21-
is_bool_dtype,
22-
is_float_dtype,
2321
is_integer_dtype,
24-
is_object_dtype,
2522
is_string_dtype,
2623
pandas_dtype,
2724
)
@@ -171,24 +168,24 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
171168
original = values
172169
values = np.array(values, copy=copy)
173170
inferred_type = None
174-
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
171+
if values.dtype == object or is_string_dtype(values.dtype):
175172
inferred_type = lib.infer_dtype(values, skipna=True)
176173
if inferred_type == "boolean" and dtype is None:
177174
name = dtype_cls.__name__.strip("_")
178175
raise TypeError(f"{values.dtype} cannot be converted to {name}")
179176

180-
elif is_bool_dtype(values) and checker(dtype):
177+
elif values.dtype.kind == "b" and checker(dtype):
181178
values = np.array(values, dtype=default_dtype, copy=copy)
182179

183-
elif not (is_integer_dtype(values) or is_float_dtype(values)):
180+
elif values.dtype.kind not in "iuf":
184181
name = dtype_cls.__name__.strip("_")
185182
raise TypeError(f"{values.dtype} cannot be converted to {name}")
186183

187184
if values.ndim != 1:
188185
raise TypeError("values must be a 1D list-like")
189186

190187
if mask is None:
191-
if is_integer_dtype(values):
188+
if values.dtype.kind in "iu":
192189
# fastpath
193190
mask = np.zeros(len(values), dtype=np.bool_)
194191
else:
@@ -205,7 +202,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
205202
else:
206203
dtype = dtype.type
207204

208-
if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
205+
if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
209206
if mask.all():
210207
values = np.ones(values.shape, dtype=dtype)
211208
else:

pandas/core/arrays/period.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,7 @@
5656
from pandas.core.dtypes.common import (
5757
ensure_object,
5858
is_datetime64_any_dtype,
59-
is_datetime64_dtype,
6059
is_dtype_equal,
61-
is_float_dtype,
62-
is_integer_dtype,
6360
is_period_dtype,
6461
pandas_dtype,
6562
)
@@ -915,7 +912,7 @@ def period_array(
915912
"""
916913
data_dtype = getattr(data, "dtype", None)
917914

918-
if is_datetime64_dtype(data_dtype):
915+
if lib.is_np_dtype(data_dtype, "M"):
919916
return PeriodArray._from_datetime64(data, freq)
920917
if isinstance(data_dtype, PeriodDtype):
921918
out = PeriodArray(data)
@@ -937,10 +934,10 @@ def period_array(
937934
else:
938935
dtype = None
939936

940-
if is_float_dtype(arrdata) and len(arrdata) > 0:
937+
if arrdata.dtype.kind == "f" and len(arrdata) > 0:
941938
raise TypeError("PeriodIndex does not allow floating point in construction")
942939

943-
if is_integer_dtype(arrdata.dtype):
940+
if arrdata.dtype.kind in "iu":
944941
arr = arrdata.astype(np.int64, copy=False)
945942
# error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
946943
# Tick, None]"; expected "Union[timedelta, BaseOffset, str]"

pandas/core/arrays/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,7 @@ def sequence_to_td64ns(
925925
elif is_float_dtype(data.dtype):
926926
# cast the unit, multiply base/frac separately
927927
# to avoid precision issues from float -> int
928-
if is_extension_array_dtype(data):
928+
if is_extension_array_dtype(data.dtype):
929929
mask = data._mask
930930
data = data._data
931931
else:

pandas/core/construction.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
is_datetime64_ns_dtype,
4646
is_dtype_equal,
4747
is_extension_array_dtype,
48-
is_integer_dtype,
4948
is_list_like,
5049
is_object_dtype,
5150
is_timedelta64_ns_dtype,
@@ -749,7 +748,7 @@ def _try_cast(
749748
"""
750749
is_ndarray = isinstance(arr, np.ndarray)
751750

752-
if is_object_dtype(dtype):
751+
if dtype == object:
753752
if not is_ndarray:
754753
subarr = construct_1d_object_array_from_listlike(arr)
755754
return subarr
@@ -773,7 +772,7 @@ def _try_cast(
773772

774773
# GH#15832: Check if we are requesting a numeric dtype and
775774
# that we can convert the data to the requested dtype.
776-
elif is_integer_dtype(dtype):
775+
elif dtype.kind in "iu":
777776
# this will raise if we have e.g. floats
778777

779778
subarr = maybe_cast_to_integer_array(arr, dtype)

pandas/core/dtypes/astype.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
from pandas.core.dtypes.common import (
2121
is_dtype_equal,
22-
is_integer_dtype,
2322
is_object_dtype,
2423
is_string_dtype,
2524
pandas_dtype,
@@ -99,10 +98,10 @@ def _astype_nansafe(
9998
arr, skipna=skipna, convert_na_value=False
10099
).reshape(shape)
101100

102-
elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
101+
elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
103102
return _astype_float_to_int_nansafe(arr, dtype, copy)
104103

105-
elif is_object_dtype(arr.dtype):
104+
elif arr.dtype == object:
106105
# if we have a datetime/timedelta array of objects
107106
# then coerce to datetime64[ns] and use DatetimeArray.astype
108107

@@ -131,7 +130,7 @@ def _astype_nansafe(
131130
)
132131
raise ValueError(msg)
133132

134-
if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
133+
if copy or arr.dtype == object or dtype == object:
135134
# Explicit copy, or required since NumPy can't view from / to object.
136135
return arr.astype(dtype, copy=True)
137136

0 commit comments

Comments
 (0)