Closed
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
It appears the the 2.2.0rc0 candidate introduced a regression converting pyarrow typed floats back to numpy ones. I do this to get back the missing interpolation functionality.
>>> s = pd.Series([1.2, None, 3.2], dtype='float64[pyarrow]')
>>> s.astype(float)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[248], line 2
1 s = pd.Series([1.2, None, 3.2], dtype='float64[pyarrow]')
----> 2 s.astype(float)
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/generic.py:6637, in NDFrame.astype(self, dtype, copy, errors)
6631 results = [
6632 ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
6633 ]
6635 else:
6636 # else, only a single dtype is given
-> 6637 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6638 res = self._constructor_from_mgr(new_data, axes=new_data.axes)
6639 return res.__finalize__(self, method="astype")
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/managers.py:431, in BaseBlockManager.astype(self, dtype, copy, errors)
428 elif using_copy_on_write():
429 copy = False
--> 431 return self.apply(
432 "astype",
433 dtype=dtype,
434 copy=copy,
435 errors=errors,
436 using_cow=using_copy_on_write(),
437 )
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/managers.py:364, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
362 applied = b.apply(f, **kwargs)
363 else:
--> 364 applied = getattr(b, f)(**kwargs)
365 result_blocks = extend_blocks(applied, result_blocks)
367 out = type(self).from_blocks(result_blocks, self.axes)
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/blocks.py:754, in Block.astype(self, dtype, copy, errors, using_cow, squeeze)
751 raise ValueError("Can not squeeze with more than one column.")
752 values = values[0, :] # type: ignore[call-overload]
--> 754 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
756 new_values = maybe_coerce_values(new_values)
758 refs = None
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:237, in astype_array_safe(values, dtype, copy, errors)
234 dtype = dtype.numpy_dtype
236 try:
--> 237 new_values = astype_array(values, dtype, copy=copy)
238 except (ValueError, TypeError):
239 # e.g. _astype_nansafe can fail on object-dtype of strings
240 # trying to convert to float
241 if errors == "ignore":
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:179, in astype_array(values, dtype, copy)
175 return values
177 if not isinstance(values, np.ndarray):
178 # i.e. ExtensionArray
--> 179 values = values.astype(dtype, copy=copy)
181 else:
182 values = _astype_nansafe(values, dtype, copy=copy)
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/base.py:721, in ExtensionArray.astype(self, dtype, copy)
717 from pandas.core.arrays import TimedeltaArray
719 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
--> 721 return np.array(self, dtype=dtype, copy=copy)
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/arrow/array.py:633, in ArrowExtensionArray.__array__(self, dtype)
631 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
632 """Correctly construct numpy arrays when passed to `np.asarray()`."""
--> 633 return self.to_numpy(dtype=dtype)
File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/arrow/array.py:1363, in ArrowExtensionArray.to_numpy(self, dtype, copy, na_value)
1361 result = np.empty(len(data), dtype=dtype)
1362 mask = data.isna()
-> 1363 result[mask] = na_value
1364 result[~mask] = data[~mask]._pa_array.to_numpy()
1365 return result
TypeError: float() argument must be a string or a real number, not 'NAType'
### Issue Description
Prior to 2.2rc0, I could cast the type back to float (to get interpolate to function again).
### Expected Behavior
The interpolation to work.
### Installed Versions
<details>
INSTALLED VERSIONS
------------------
commit : d4c8d82b52045f49a0bb1d762968918a06886ae9
python : 3.11.6.final.0
python-bits : 64
OS : Darwin
OS-release : 23.2.0
Version : Darwin Kernel Version 23.2.0: Wed Nov 15 21:53:18 PST 2023; root:xnu-10002.61.3~2/RELEASE_ARM64_T6000
machine : arm64
processor : arm
byteorder : little
LC_ALL : en_US.UTF-8
LANG : None
LOCALE : en_US.UTF-8
pandas : 2.2.0rc0
numpy : 1.26.2
pytz : 2023.3.post1
dateutil : 2.8.2
setuptools : 68.2.2
pip : 23.3.1
Cython : 3.0.7
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.19.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.2
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.8.2
numba : 0.58.1
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 14.0.2
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2023.3
qtpy : None
pyqt5 : None
</details>