Skip to content

test_additional_extension_arrays fails with pd.NA #29976

Closed
@jbrockmendel

Description

@jbrockmendel

cc @jorisvandenbossche

im using pyarrow 0.15.1 locally

_____________________________________________ TestParquetPyArrow.test_additional_extension_arrays _____________________________________________

self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x129bc4050>, pa = 'pyarrow'

    @td.skip_if_no("pyarrow", min_version="0.15.0")
    def test_additional_extension_arrays(self, pa):
        # test additional ExtensionArrays that are supported through the
        # __arrow_array__ protocol
        df = pd.DataFrame(
            {
                "a": pd.Series([1, 2, 3], dtype="Int64"),
                "b": pd.Series(["a", None, "c"], dtype="string"),
            }
        )
        if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15.1.dev"):
            expected = df
        else:
            # de-serialized as plain int / object
            expected = df.assign(a=df.a.astype("int64"), b=df.b.astype("object"))
>       check_round_trip(df, pa, expected=expected)

pandas/tests/io/test_parquet.py:522: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_parquet.py:175: in check_round_trip
    compare(repeat)
pandas/tests/io/test_parquet.py:167: in compare
    df.to_parquet(path, **write_kwargs)
pandas/core/frame.py:2061: in to_parquet
    **kwargs,
pandas/io/parquet.py:255: in to_parquet
    **kwargs,
pandas/io/parquet.py:101: in write
    table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
pyarrow/table.pxi:1057: in pyarrow.lib.Table.from_pandas
    ???
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:555: in dataframe_to_arrays
    for c, f in zip(columns_to_convert, convert_fields)]
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:555: in <listcomp>
    for c, f in zip(columns_to_convert, convert_fields)]
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:546: in convert_column
    raise e
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:540: in convert_column
    result = pa.array(col, type=type_, from_pandas=True, safe=safe)
pyarrow/array.pxi:196: in pyarrow.lib.array
    ???
pyarrow/array.pxi:92: in pyarrow.lib._handle_arrow_array_protocol
    ???
pandas/core/arrays/string_.py:200: in __arrow_array__
    return pa.array(self._ndarray, type=type, from_pandas=True)
pyarrow/array.pxi:207: in pyarrow.lib.array
    ???
pyarrow/array.pxi:78: in pyarrow.lib._ndarray_to_array
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

>   ???
E   pyarrow.lib.ArrowTypeError: ("Expected a string or bytes object, got a 'NAType' object", 'Conversion failed for column b with type string')

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugExtensionArrayExtending pandas with custom dtypes or arrays.

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions