Skip to content

BUG: Can't convert float64[pyarrow] back to NumPy float #56649

Closed
@mattharrison

Description

@mattharrison

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

It appears the the 2.2.0rc0 candidate introduced a regression converting pyarrow typed floats back to numpy ones. I do this to get back the missing interpolation functionality.


>>> s = pd.Series([1.2, None, 3.2], dtype='float64[pyarrow]')
>>> s.astype(float)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[248], line 2
      1 s = pd.Series([1.2, None, 3.2], dtype='float64[pyarrow]')
----> 2 s.astype(float)

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/generic.py:6637, in NDFrame.astype(self, dtype, copy, errors)
   6631     results = [
   6632         ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
   6633     ]
   6635 else:
   6636     # else, only a single dtype is given
-> 6637     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6638     res = self._constructor_from_mgr(new_data, axes=new_data.axes)
   6639     return res.__finalize__(self, method="astype")

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/managers.py:431, in BaseBlockManager.astype(self, dtype, copy, errors)
    428 elif using_copy_on_write():
    429     copy = False
--> 431 return self.apply(
    432     "astype",
    433     dtype=dtype,
    434     copy=copy,
    435     errors=errors,
    436     using_cow=using_copy_on_write(),
    437 )

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/managers.py:364, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
    362         applied = b.apply(f, **kwargs)
    363     else:
--> 364         applied = getattr(b, f)(**kwargs)
    365     result_blocks = extend_blocks(applied, result_blocks)
    367 out = type(self).from_blocks(result_blocks, self.axes)

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/internals/blocks.py:754, in Block.astype(self, dtype, copy, errors, using_cow, squeeze)
    751         raise ValueError("Can not squeeze with more than one column.")
    752     values = values[0, :]  # type: ignore[call-overload]
--> 754 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    756 new_values = maybe_coerce_values(new_values)
    758 refs = None

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:237, in astype_array_safe(values, dtype, copy, errors)
    234     dtype = dtype.numpy_dtype
    236 try:
--> 237     new_values = astype_array(values, dtype, copy=copy)
    238 except (ValueError, TypeError):
    239     # e.g. _astype_nansafe can fail on object-dtype of strings
    240     #  trying to convert to float
    241     if errors == "ignore":

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:179, in astype_array(values, dtype, copy)
    175     return values
    177 if not isinstance(values, np.ndarray):
    178     # i.e. ExtensionArray
--> 179     values = values.astype(dtype, copy=copy)
    181 else:
    182     values = _astype_nansafe(values, dtype, copy=copy)

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/base.py:721, in ExtensionArray.astype(self, dtype, copy)
    717     from pandas.core.arrays import TimedeltaArray
    719     return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
--> 721 return np.array(self, dtype=dtype, copy=copy)

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/arrow/array.py:633, in ArrowExtensionArray.__array__(self, dtype)
    631 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
    632     """Correctly construct numpy arrays when passed to `np.asarray()`."""
--> 633     return self.to_numpy(dtype=dtype)

File ~/.envs/pd22rc/lib/python3.11/site-packages/pandas/core/arrays/arrow/array.py:1363, in ArrowExtensionArray.to_numpy(self, dtype, copy, na_value)
   1361     result = np.empty(len(data), dtype=dtype)
   1362     mask = data.isna()
-> 1363     result[mask] = na_value
   1364     result[~mask] = data[~mask]._pa_array.to_numpy()
   1365 return result

TypeError: float() argument must be a string or a real number, not 'NAType'


### Issue Description

Prior to 2.2rc0, I could cast the type back to float (to get interpolate to function again).

### Expected Behavior

The interpolation to work.

### Installed Versions

<details>

INSTALLED VERSIONS
------------------
commit                : d4c8d82b52045f49a0bb1d762968918a06886ae9
python                : 3.11.6.final.0
python-bits           : 64
OS                    : Darwin
OS-release            : 23.2.0
Version               : Darwin Kernel Version 23.2.0: Wed Nov 15 21:53:18 PST 2023; root:xnu-10002.61.3~2/RELEASE_ARM64_T6000
machine               : arm64
processor             : arm
byteorder             : little
LC_ALL                : en_US.UTF-8
LANG                  : None
LOCALE                : en_US.UTF-8

pandas                : 2.2.0rc0
numpy                 : 1.26.2
pytz                  : 2023.3.post1
dateutil              : 2.8.2
setuptools            : 68.2.2
pip                   : 23.3.1
Cython                : 3.0.7
pytest                : None
hypothesis            : None
sphinx                : None
blosc                 : None
feather               : None
xlsxwriter            : None
lxml.etree            : None
html5lib              : None
pymysql               : None
psycopg2              : None
jinja2                : 3.1.2
IPython               : 8.19.0
pandas_datareader     : None
adbc-driver-postgresql: None
adbc-driver-sqlite    : None
bs4                   : 4.12.2
bottleneck            : None
dataframe-api-compat  : None
fastparquet           : None
fsspec                : None
gcsfs                 : None
matplotlib            : 3.8.2
numba                 : 0.58.1
numexpr               : None
odfpy                 : None
openpyxl              : None
pandas_gbq            : None
pyarrow               : 14.0.2
pyreadstat            : None
python-calamine       : None
pyxlsb                : None
s3fs                  : None
scipy                 : None
sqlalchemy            : None
tables                : None
tabulate              : None
xarray                : None
xlrd                  : None
zstandard             : None
tzdata                : 2023.3
qtpy                  : None
pyqt5                 : None



</details>

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugNeeds TriageIssue that has not been reviewed by a pandas team member

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions