Skip to content

Commit 7d36014

Browse files
committed
Merge branch 'master' into rename_DataFrame.applymap_to_DataFrame.map
2 parents 84e0448 + 7974ad0 commit 7d36014

File tree

6 files changed

+80
-3
lines changed

6 files changed

+80
-3
lines changed

doc/source/whatsnew/v2.0.1.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ Fixed regressions
2020

2121
Bug fixes
2222
~~~~~~~~~
23-
-
23+
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
24+
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
25+
- Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
2426

2527
.. ---------------------------------------------------------------------------
2628
.. _whatsnew_201.other:

pandas/core/arrays/arrow/array.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,11 @@ def to_numpy(
10441044
result = np.empty(len(self), dtype=object)
10451045
mask = ~self.isna()
10461046
result[mask] = np.asarray(self[mask]._pa_array)
1047+
elif pa.types.is_null(self._pa_array.type):
1048+
result = np.asarray(self._pa_array, dtype=dtype)
1049+
if not isna(na_value):
1050+
result[:] = na_value
1051+
return result
10471052
elif self._hasna:
10481053
data = self.copy()
10491054
data[self.isna()] = na_value

pandas/core/methods/describe.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_timedelta64_dtype,
3737
)
3838

39+
from pandas.core.arrays.arrow.dtype import ArrowDtype
3940
from pandas.core.arrays.floating import Float64Dtype
4041
from pandas.core.reshape.concat import concat
4142

@@ -229,7 +230,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
229230
# GH#48340 - always return float on non-complex numeric data
230231
dtype: DtypeObj | None
231232
if is_extension_array_dtype(series.dtype):
232-
dtype = Float64Dtype()
233+
if isinstance(series.dtype, ArrowDtype):
234+
import pyarrow as pa
235+
236+
dtype = ArrowDtype(pa.float64())
237+
else:
238+
dtype = Float64Dtype()
233239
elif is_numeric_dtype(series.dtype) and not is_complex_dtype(series.dtype):
234240
dtype = np.dtype("float")
235241
else:

pandas/core/reshape/merge.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,15 @@
8080
)
8181

8282
from pandas import (
83+
ArrowDtype,
8384
Categorical,
8485
Index,
8586
MultiIndex,
8687
Series,
8788
)
8889
import pandas.core.algorithms as algos
8990
from pandas.core.arrays import (
91+
ArrowExtensionArray,
9092
BaseMaskedArray,
9193
ExtensionArray,
9294
)
@@ -2377,7 +2379,11 @@ def _factorize_keys(
23772379
rk = ensure_int64(rk.codes)
23782380

23792381
elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype):
2380-
if not isinstance(lk, BaseMaskedArray):
2382+
if not isinstance(lk, BaseMaskedArray) and not (
2383+
# exclude arrow dtypes that would get cast to object
2384+
isinstance(lk.dtype, ArrowDtype)
2385+
and is_numeric_dtype(lk.dtype.numpy_dtype)
2386+
):
23812387
lk, _ = lk._values_for_factorize()
23822388

23832389
# error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
@@ -2392,6 +2398,16 @@ def _factorize_keys(
23922398
assert isinstance(rk, BaseMaskedArray)
23932399
llab = rizer.factorize(lk._data, mask=lk._mask)
23942400
rlab = rizer.factorize(rk._data, mask=rk._mask)
2401+
elif isinstance(lk, ArrowExtensionArray):
2402+
assert isinstance(rk, ArrowExtensionArray)
2403+
# we can only get here with numeric dtypes
2404+
# TODO: Remove when we have a Factorizer for Arrow
2405+
llab = rizer.factorize(
2406+
lk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=lk.isna()
2407+
)
2408+
rlab = rizer.factorize(
2409+
rk.to_numpy(na_value=1, dtype=lk.dtype.numpy_dtype), mask=rk.isna()
2410+
)
23952411
else:
23962412
# Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
23972413
# "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
@@ -2450,6 +2466,8 @@ def _convert_arrays_and_get_rizer_klass(
24502466
# Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]";
24512467
# expected type "Type[object]"
24522468
klass = _factorizers[lk.dtype.type] # type: ignore[index]
2469+
elif isinstance(lk.dtype, ArrowDtype):
2470+
klass = _factorizers[lk.dtype.numpy_dtype.type]
24532471
else:
24542472
klass = _factorizers[lk.dtype.type]
24552473

pandas/tests/extension/test_arrow.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import numpy as np
2828
import pytest
2929

30+
from pandas._libs import lib
3031
from pandas.compat import (
3132
PY311,
3233
is_ci_environment,
@@ -1676,6 +1677,23 @@ def test_to_numpy_int_with_na():
16761677
tm.assert_numpy_array_equal(result, expected)
16771678

16781679

1680+
@pytest.mark.parametrize("na_val, exp", [(lib.no_default, np.nan), (1, 1)])
1681+
def test_to_numpy_null_array(na_val, exp):
1682+
# GH#52443
1683+
arr = pd.array([pd.NA, pd.NA], dtype="null[pyarrow]")
1684+
result = arr.to_numpy(dtype="float64", na_value=na_val)
1685+
expected = np.array([exp] * 2, dtype="float64")
1686+
tm.assert_numpy_array_equal(result, expected)
1687+
1688+
1689+
def test_to_numpy_null_array_no_dtype():
1690+
# GH#52443
1691+
arr = pd.array([pd.NA, pd.NA], dtype="null[pyarrow]")
1692+
result = arr.to_numpy(dtype=None)
1693+
expected = np.array([pd.NA] * 2, dtype="object")
1694+
tm.assert_numpy_array_equal(result, expected)
1695+
1696+
16791697
def test_setitem_null_slice(data):
16801698
# GH50248
16811699
orig = data.copy()
@@ -2387,3 +2405,16 @@ def test_setitem_boolean_replace_with_mask_segfault():
23872405
expected = arr.copy()
23882406
arr[np.zeros((N,), dtype=np.bool_)] = False
23892407
assert arr._pa_array == expected._pa_array
2408+
2409+
2410+
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
2411+
def test_describe_numeric_data(pa_type):
2412+
# GH 52470
2413+
data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type))
2414+
result = data.describe()
2415+
expected = pd.Series(
2416+
[3, 2, 1, 1, 1.5, 2.0, 2.5, 3],
2417+
dtype=ArrowDtype(pa.float64()),
2418+
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
2419+
)
2420+
tm.assert_series_equal(result, expected)

pandas/tests/reshape/merge/test_merge.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,3 +2761,18 @@ def test_merge_ea_and_non_ea(any_numeric_ea_dtype, join_type):
27612761
}
27622762
)
27632763
tm.assert_frame_equal(result, expected)
2764+
2765+
2766+
@pytest.mark.parametrize("dtype", ["int64", "int64[pyarrow]"])
2767+
def test_merge_arrow_and_numpy_dtypes(dtype):
2768+
# GH#52406
2769+
pytest.importorskip("pyarrow")
2770+
df = DataFrame({"a": [1, 2]}, dtype=dtype)
2771+
df2 = DataFrame({"a": [1, 2]}, dtype="int64[pyarrow]")
2772+
result = df.merge(df2)
2773+
expected = df.copy()
2774+
tm.assert_frame_equal(result, expected)
2775+
2776+
result = df2.merge(df)
2777+
expected = df2.copy()
2778+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)