Closed
Description
im using pyarrow 0.15.1 locally
_____________________________________________ TestParquetPyArrow.test_additional_extension_arrays _____________________________________________
self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x129bc4050>, pa = 'pyarrow'
@td.skip_if_no("pyarrow", min_version="0.15.0")
def test_additional_extension_arrays(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol
df = pd.DataFrame(
{
"a": pd.Series([1, 2, 3], dtype="Int64"),
"b": pd.Series(["a", None, "c"], dtype="string"),
}
)
if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15.1.dev"):
expected = df
else:
# de-serialized as plain int / object
expected = df.assign(a=df.a.astype("int64"), b=df.b.astype("object"))
> check_round_trip(df, pa, expected=expected)
pandas/tests/io/test_parquet.py:522:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_parquet.py:175: in check_round_trip
compare(repeat)
pandas/tests/io/test_parquet.py:167: in compare
df.to_parquet(path, **write_kwargs)
pandas/core/frame.py:2061: in to_parquet
**kwargs,
pandas/io/parquet.py:255: in to_parquet
**kwargs,
pandas/io/parquet.py:101: in write
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
pyarrow/table.pxi:1057: in pyarrow.lib.Table.from_pandas
???
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:555: in dataframe_to_arrays
for c, f in zip(columns_to_convert, convert_fields)]
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:555: in <listcomp>
for c, f in zip(columns_to_convert, convert_fields)]
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:546: in convert_column
raise e
/usr/local/lib/python3.7/site-packages/pyarrow/pandas_compat.py:540: in convert_column
result = pa.array(col, type=type_, from_pandas=True, safe=safe)
pyarrow/array.pxi:196: in pyarrow.lib.array
???
pyarrow/array.pxi:92: in pyarrow.lib._handle_arrow_array_protocol
???
pandas/core/arrays/string_.py:200: in __arrow_array__
return pa.array(self._ndarray, type=type, from_pandas=True)
pyarrow/array.pxi:207: in pyarrow.lib.array
???
pyarrow/array.pxi:78: in pyarrow.lib._ndarray_to_array
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E pyarrow.lib.ArrowTypeError: ("Expected a string or bytes object, got a 'NAType' object", 'Conversion failed for column b with type string')