Skip to content

Commit bb424ba

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into add_numeric_only_gb
2 parents ebf777a + a2029ce commit bb424ba

File tree

11 files changed

+126
-50
lines changed

11 files changed

+126
-50
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,7 @@ Conversion
499499
- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`)
500500
- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`)
501501
- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`)
502+
- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`)
502503

503504
Strings
504505
^^^^^^^

pandas/_libs/algos.pxd

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from pandas._libs.dtypes cimport numeric_t
1+
from pandas._libs.dtypes cimport (
2+
numeric_object_t,
3+
numeric_t,
4+
)
25

36

47
cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
@@ -10,3 +13,10 @@ cdef enum TiebreakEnumType:
1013
TIEBREAK_FIRST
1114
TIEBREAK_FIRST_DESCENDING
1215
TIEBREAK_DENSE
16+
17+
18+
cdef numeric_object_t get_rank_nan_fill_val(
19+
bint rank_nans_highest,
20+
numeric_object_t val,
21+
bint is_datetimelike=*,
22+
)

pandas/_libs/algos.pyx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
822822

823823
cdef numeric_object_t get_rank_nan_fill_val(
824824
bint rank_nans_highest,
825-
numeric_object_t[:] _=None
825+
numeric_object_t val,
826+
bint is_datetimelike=False,
826827
):
827828
"""
828829
Return the value we'll use to represent missing values when sorting depending
829830
on if we'd like missing values to end up at the top/bottom. (The second parameter
830831
is unused, but needed for fused type specialization)
831832
"""
833+
if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest:
834+
return NPY_NAT + 1
835+
832836
if rank_nans_highest:
833837
if numeric_object_t is object:
834838
return Infinity()
@@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val(
854858
if numeric_object_t is object:
855859
return NegInfinity()
856860
elif numeric_object_t is int64_t:
861+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
862+
# instead of NPY_NAT here causes build warnings and failure in
863+
# test_cummax_i8_at_implementation_bound
857864
return NPY_NAT
858865
elif numeric_object_t is int32_t:
859866
return util.INT32_MIN
@@ -975,7 +982,7 @@ def rank_1d(
975982
# will flip the ordering to still end up with lowest rank.
976983
# Symmetric logic applies to `na_option == 'bottom'`
977984
nans_rank_highest = ascending ^ (na_option == 'top')
978-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
985+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
979986
if nans_rank_highest:
980987
order = [masked_vals, mask]
981988
else:
@@ -1335,7 +1342,7 @@ def rank_2d(
13351342

13361343
nans_rank_highest = ascending ^ (na_option == 'top')
13371344
if check_mask:
1338-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
1345+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
13391346

13401347
if numeric_object_t is object:
13411348
mask = missing.isnaobj2d(values).view(np.uint8)

pandas/_libs/groupby.pyx

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ from numpy.math cimport NAN
3131
cnp.import_array()
3232

3333
from pandas._libs cimport util
34-
from pandas._libs.algos cimport kth_smallest_c
34+
from pandas._libs.algos cimport (
35+
get_rank_nan_fill_val,
36+
kth_smallest_c,
37+
)
3538

3639
from pandas._libs.algos import (
3740
ensure_platform_int,
@@ -989,36 +992,16 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
989992
return False
990993

991994

992-
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
995+
cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike):
993996
"""
994-
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995-
to effectively make numeric_t an argument.
997+
Find either the min or the max supported by numeric_object_t; 'val' is a
998+
placeholder to effectively make numeric_object_t an argument.
996999
"""
997-
if numeric_t is int64_t:
998-
if compute_max and is_datetimelike:
999-
return -_int64_max
1000-
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
1001-
# instead of NPY_NAT here causes build warnings and failure in
1002-
# test_cummax_i8_at_implementation_bound
1003-
return NPY_NAT if compute_max else util.INT64_MAX
1004-
elif numeric_t is int32_t:
1005-
return util.INT32_MIN if compute_max else util.INT32_MAX
1006-
elif numeric_t is int16_t:
1007-
return util.INT16_MIN if compute_max else util.INT16_MAX
1008-
elif numeric_t is int8_t:
1009-
return util.INT8_MIN if compute_max else util.INT8_MAX
1010-
1011-
elif numeric_t is uint64_t:
1012-
return 0 if compute_max else util.UINT64_MAX
1013-
elif numeric_t is uint32_t:
1014-
return 0 if compute_max else util.UINT32_MAX
1015-
elif numeric_t is uint16_t:
1016-
return 0 if compute_max else util.UINT16_MAX
1017-
elif numeric_t is uint8_t:
1018-
return 0 if compute_max else util.UINT8_MAX
1019-
1020-
else:
1021-
return -np.inf if compute_max else np.inf
1000+
return get_rank_nan_fill_val(
1001+
not compute_max,
1002+
val=val,
1003+
is_datetimelike=is_datetimelike,
1004+
)
10221005

10231006

10241007
cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):

pandas/_libs/tslibs/conversion.pyx

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import cython
22
import numpy as np
33

44
cimport numpy as cnp
5+
from cpython.object cimport PyObject
56
from numpy cimport (
67
int32_t,
78
int64_t,
@@ -273,7 +274,8 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
273274

274275
@cython.boundscheck(False)
275276
@cython.wraparound(False)
276-
def datetime_to_datetime64(ndarray[object] values):
277+
def datetime_to_datetime64(ndarray values):
278+
# ndarray[object], but can't declare object without ndim
277279
"""
278280
Convert ndarray of datetime-like objects to int64 array representing
279281
nanosecond timestamps.
@@ -288,20 +290,27 @@ def datetime_to_datetime64(ndarray[object] values):
288290
inferred_tz : tzinfo or None
289291
"""
290292
cdef:
291-
Py_ssize_t i, n = len(values)
293+
Py_ssize_t i, n = values.size
292294
object val
293-
int64_t[:] iresult
295+
int64_t ival
296+
ndarray iresult # int64_t, but can't declare that without specifying ndim
294297
npy_datetimestruct dts
295298
_TSObject _ts
296299
bint found_naive = False
297300
tzinfo inferred_tz = None
298301

299-
result = np.empty(n, dtype='M8[ns]')
302+
cnp.broadcast mi
303+
304+
result = np.empty((<object>values).shape, dtype='M8[ns]')
300305
iresult = result.view('i8')
306+
307+
mi = cnp.PyArray_MultiIterNew2(iresult, values)
301308
for i in range(n):
302-
val = values[i]
309+
# Analogous to: val = values[i]
310+
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
311+
303312
if checknull_with_nat(val):
304-
iresult[i] = NPY_NAT
313+
ival = NPY_NAT
305314
elif PyDateTime_Check(val):
306315
if val.tzinfo is not None:
307316
if found_naive:
@@ -314,18 +323,23 @@ def datetime_to_datetime64(ndarray[object] values):
314323
inferred_tz = val.tzinfo
315324

316325
_ts = convert_datetime_to_tsobject(val, None)
317-
iresult[i] = _ts.value
326+
ival = _ts.value
318327
check_dts_bounds(&_ts.dts)
319328
else:
320329
found_naive = True
321330
if inferred_tz is not None:
322331
raise ValueError('Cannot mix tz-aware with '
323332
'tz-naive values')
324-
iresult[i] = pydatetime_to_dt64(val, &dts)
333+
ival = pydatetime_to_dt64(val, &dts)
325334
check_dts_bounds(&dts)
326335
else:
327336
raise TypeError(f'Unrecognized value type: {type(val)}')
328337

338+
# Analogous to: iresult[i] = ival
339+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
340+
341+
cnp.PyArray_MultiIter_NEXT(mi)
342+
329343
return result, inferred_tz
330344

331345

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,10 +2247,9 @@ def objects_to_datetime64ns(
22472247
result = result.reshape(data.shape, order=order)
22482248
except ValueError as err:
22492249
try:
2250-
values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
2250+
values, tz_parsed = conversion.datetime_to_datetime64(data)
22512251
# If tzaware, these values represent unix timestamps, so we
22522252
# return them as i8 to distinguish from wall times
2253-
values = values.reshape(data.shape, order=order)
22542253
return values.view("i8"), tz_parsed
22552254
except (ValueError, TypeError):
22562255
raise err

pandas/core/arrays/timedeltas.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,14 +429,15 @@ def _formatter(self, boxed: bool = False):
429429

430430
return get_format_timedelta64(self, box=True)
431431

432-
@dtl.ravel_compat
433432
def _format_native_types(
434433
self, *, na_rep="NaT", date_format=None, **kwargs
435434
) -> npt.NDArray[np.object_]:
436435
from pandas.io.formats.format import get_format_timedelta64
437436

438437
formatter = get_format_timedelta64(self._ndarray, na_rep)
439-
return np.array([formatter(x) for x in self._ndarray])
438+
# equiv: np.array([formatter(x) for x in self._ndarray])
439+
# but independent of dimension
440+
return np.frompyfunc(formatter, 1, 1)(self._ndarray)
440441

441442
# ----------------------------------------------------------------
442443
# Arithmetic Methods

pandas/core/dtypes/generic.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,25 @@
3737
# define abstract base classes to enable isinstance type checking on our
3838
# objects
3939
def create_pandas_abc_type(name, attr, comp):
40+
def _check(inst):
41+
return getattr(inst, attr, "_typ") in comp
4042

4143
# https://github.com/python/mypy/issues/1006
4244
# error: 'classmethod' used with a non-method
4345
@classmethod # type: ignore[misc]
44-
def _check(cls, inst) -> bool:
45-
return getattr(inst, attr, "_typ") in comp
46+
def _instancecheck(cls, inst) -> bool:
47+
return _check(inst) and not isinstance(inst, type)
48+
49+
@classmethod # type: ignore[misc]
50+
def _subclasscheck(cls, inst) -> bool:
51+
# Raise instead of returning False
52+
# This is consistent with default __subclasscheck__ behavior
53+
if not isinstance(inst, type):
54+
raise TypeError("issubclass() arg 1 must be a class")
55+
56+
return _check(inst)
4657

47-
dct = {"__instancecheck__": _check, "__subclasscheck__": _check}
58+
dct = {"__instancecheck__": _instancecheck, "__subclasscheck__": _subclasscheck}
4859
meta = type("ABCBase", (type,), dct)
4960
return meta(name, (), dct)
5061

pandas/tests/apply/test_frame_apply.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,3 +1551,29 @@ def foo(x):
15511551
df = DataFrame({"a": [1, 2, 3]})
15521552
with tm.assert_produces_warning(UserWarning, match="Hello, World!"):
15531553
df.agg([foo])
1554+
1555+
1556+
def test_apply_type():
1557+
# GH 46719
1558+
df = DataFrame(
1559+
{"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
1560+
index=["a", "b", "c"],
1561+
)
1562+
1563+
# applymap
1564+
result = df.applymap(type)
1565+
expected = DataFrame(
1566+
{"col1": [int, str, type], "col2": [float, datetime, float]},
1567+
index=["a", "b", "c"],
1568+
)
1569+
tm.assert_frame_equal(result, expected)
1570+
1571+
# axis=0
1572+
result = df.apply(type, axis=0)
1573+
expected = Series({"col1": Series, "col2": Series})
1574+
tm.assert_series_equal(result, expected)
1575+
1576+
# axis=1
1577+
result = df.apply(type, axis=1)
1578+
expected = Series({"a": Series, "b": Series, "c": Series})
1579+
tm.assert_series_equal(result, expected)

pandas/tests/apply/test_series_apply.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,3 +889,11 @@ def test_apply_retains_column_name():
889889
index=Index(range(3), name="x"),
890890
)
891891
tm.assert_frame_equal(result, expected)
892+
893+
894+
def test_apply_type():
895+
# GH 46719
896+
s = Series([3, "string", float], index=["a", "b", "c"])
897+
result = s.apply(type)
898+
expected = Series([int, str, type], index=["a", "b", "c"])
899+
tm.assert_series_equal(result, expected)

pandas/tests/dtypes/test_generic.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from warnings import catch_warnings
23

34
import numpy as np
@@ -49,13 +50,28 @@ class TestABCClasses:
4950

5051
@pytest.mark.parametrize("abctype1, inst", abc_pairs)
5152
@pytest.mark.parametrize("abctype2, _", abc_pairs)
52-
def test_abc_pairs(self, abctype1, abctype2, inst, _):
53-
# GH 38588
53+
def test_abc_pairs_instance_check(self, abctype1, abctype2, inst, _):
54+
# GH 38588, 46719
5455
if abctype1 == abctype2:
5556
assert isinstance(inst, getattr(gt, abctype2))
57+
assert not isinstance(type(inst), getattr(gt, abctype2))
5658
else:
5759
assert not isinstance(inst, getattr(gt, abctype2))
5860

61+
@pytest.mark.parametrize("abctype1, inst", abc_pairs)
62+
@pytest.mark.parametrize("abctype2, _", abc_pairs)
63+
def test_abc_pairs_subclass_check(self, abctype1, abctype2, inst, _):
64+
# GH 38588, 46719
65+
if abctype1 == abctype2:
66+
assert issubclass(type(inst), getattr(gt, abctype2))
67+
68+
with pytest.raises(
69+
TypeError, match=re.escape("issubclass() arg 1 must be a class")
70+
):
71+
issubclass(inst, getattr(gt, abctype2))
72+
else:
73+
assert not issubclass(type(inst), getattr(gt, abctype2))
74+
5975
abc_subclasses = {
6076
"ABCIndex": [
6177
abctype

0 commit comments

Comments
 (0)