Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
ser = pd.Series([1,2,3,None,10], dtype='int64[pyarrow]')
ser.astype(float)
Issue Description
I'm trying to run some scikit learn models that impute missing values. The transformers aren't happy with <NA>
, so I'm trying to convert the pyarrow int columns to numpy float columns and getting this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[54], [line 2](vscode-notebook-cell:?execution_count=54&line=2)
[1](vscode-notebook-cell:?execution_count=54&line=1) ser = pd.Series([1,2,3,None,10], dtype='int64[pyarrow]')
----> [2](vscode-notebook-cell:?execution_count=54&line=2) ser.astype(float)
File [~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6637](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6637), in NDFrame.astype(self, dtype, copy, errors)
[6631](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6631) results = [
[6632](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6632) ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
[6633](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6633) ]
[6635](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6635) else:
[6636](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6636) # else, only a single dtype is given
-> [6637](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6637) new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
[6638](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6638) res = self._constructor_from_mgr(new_data, axes=new_data.axes)
[6639](~/.python/current/lib/python3.10/site-packages/pandas/core/generic.py:6639) return res.__finalize__(self, method="astype")
File [~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:431](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:431), in BaseBlockManager.astype(self, dtype, copy, errors)
[428](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:428) elif using_copy_on_write():
[429](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:429) copy = False
--> [431](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:431) return self.apply(
[432](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:432) "astype",
[433](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:433) dtype=dtype,
[434](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:434) copy=copy,
[435](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:435) errors=errors,
[436](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:436) using_cow=using_copy_on_write(),
[437](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:437) )
File [~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:364](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:364), in BaseBlockManager.apply(self, f, align_keys, **kwargs)
[362](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:362) applied = b.apply(f, **kwargs)
[363](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:363) else:
--> [364](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:364) applied = getattr(b, f)(**kwargs)
[365](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:365) result_blocks = extend_blocks(applied, result_blocks)
[367](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/managers.py:367) out = type(self).from_blocks(result_blocks, self.axes)
File [~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:758](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:758), in Block.astype(self, dtype, copy, errors, using_cow, squeeze)
[755](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:755) raise ValueError("Can not squeeze with more than one column.")
[756](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:756) values = values[0, :] # type: ignore[call-overload]
--> [758](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:758) new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
[760](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:760) new_values = maybe_coerce_values(new_values)
[762](~/.python/current/lib/python3.10/site-packages/pandas/core/internals/blocks.py:762) refs = None
File [~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:237](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:237), in astype_array_safe(values, dtype, copy, errors)
[234](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:234) dtype = dtype.numpy_dtype
[236](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:236) try:
--> [237](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:237) new_values = astype_array(values, dtype, copy=copy)
[238](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:238) except (ValueError, TypeError):
[239](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:239) # e.g. _astype_nansafe can fail on object-dtype of strings
[240](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:240) # trying to convert to float
[241](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:241) if errors == "ignore":
File [~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:179](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:179), in astype_array(values, dtype, copy)
[175](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:175) return values
[177](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:177) if not isinstance(values, np.ndarray):
[178](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:178) # i.e. ExtensionArray
--> [179](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:179) values = values.astype(dtype, copy=copy)
[181](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:181) else:
[182](~/.python/current/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:182) values = _astype_nansafe(values, dtype, copy=copy)
File [~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/base.py:722](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/base.py:722), in ExtensionArray.astype(self, dtype, copy)
[718](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/base.py:718) from pandas.core.arrays import TimedeltaArray
[720](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/base.py:720) return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
--> [722](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/base.py:722) return np.array(self, dtype=dtype, copy=copy)
File [~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:661](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:661), in ArrowExtensionArray.__array__(self, dtype)
[659](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:659) def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
[660](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:660) """Correctly construct numpy arrays when passed to `np.asarray()`."""
--> [661](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:661) return self.to_numpy(dtype=dtype)
File [~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1406](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1406), in ArrowExtensionArray.to_numpy(self, dtype, copy, na_value)
[1404](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1404) result = np.empty(len(data), dtype=dtype)
[1405](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1405) mask = data.isna()
-> [1406](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1406) result[mask] = na_value
[1407](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1407) result[~mask] = data[~mask]._pa_array.to_numpy()
[1408](~/.python/current/lib/python3.10/site-packages/pandas/core/arrays/arrow/array.py:1408) return result
TypeError: float() argument must be a string or a real number, not 'NAType'
Expected Behavior
Conversion to work. (I'm pretty sure this worked in Pandas 2.1).
Installed Versions
INSTALLED VERSIONS
commit : fd3f571
python : 3.10.13.final.0
python-bits : 64
OS : Linux
OS-release : 6.2.0-1018-azure
Version : #18~22.04.1-Ubuntu SMP Tue Nov 21 19:25:02 UTC 2023
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.0
numpy : 1.26.3
pytz : 2023.3.post1
dateutil : 2.8.2
setuptools : 68.2.2
pip : 23.3.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.2
IPython : 8.20.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.2
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : 2023.12.2
gcsfs : None
matplotlib : 3.8.2
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 15.0.0
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : 1.11.4
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2023.4
qtpy : None
pyqt5 : None