Closed
Description
edit: We've pinned to moto 1.3.4 for now. That seems to avoid the issues. Assuming that it's an issue with moto (which hasn't been verified), the remaining TODOs here are
- Reproduce locally with moto 1.3.6 or higher
- construct a minimal test case not using pandas, and ideally not using s3fs
- report upstream to moto
- unpin moto when it's fixed
https://travis-ci.org/pandas-dev/pandas/jobs/435861714#L2506
=================================== FAILURES ===================================
_____________________ TestParquetPyArrow.test_s3_roundtrip _____________________
[gw0] linux2 -- Python 2.7.15 /home/travis/miniconda3/envs/pandas/bin/python
self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x7f3966d04910>
df_compat = A B
0 1 foo
1 2 foo
2 3 foo
s3_resource = s3.ServiceResource(), pa = 'pyarrow'
def test_s3_roundtrip(self, df_compat, s3_resource, pa):
# GH #19134
check_round_trip(df_compat, pa,
> path='s3://pandas-test/pyarrow.parquet')
pandas/tests/io/test_parquet.py:474:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_parquet.py:169: in check_round_trip
compare(repeat)
pandas/tests/io/test_parquet.py:161: in compare
actual = read_parquet(path, **read_kwargs)
pandas/io/parquet.py:303: in read_parquet
return impl.read(path, columns=columns, **kwargs)
pandas/io/parquet.py:132: in read
path, _, _, should_close = get_filepath_or_buffer(path)
pandas/io/common.py:216: in get_filepath_or_buffer
mode=mode)
pandas/io/s3.py:38: in get_filepath_or_buffer
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:335: in open
s3_additional_kwargs=kw)
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:1143: in __init__
info = self.info()
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:1161: in info
refresh=refresh, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <s3fs.core.S3FileSystem object at 0x7f395fb5a890>
path = 'pandas-test/pyarrow.parquet', version_id = None, refresh = False
kwargs = {}, parent = 'pandas-test', bucket = 'pandas-test'
key = 'pyarrow.parquet'
def info(self, path, version_id=None, refresh=False, **kwargs):
""" Detail on the specific file pointed to by path.
Gets details only for a specific key, directories/buckets cannot be
used with info.
Parameters
----------
version_id : str, optional
version of the key to perform the head_object on
refresh : bool
If true, don't look in the info cache
"""
parent = path.rsplit('/', 1)[0]
if not refresh:
if path in self.dirs:
files = self.dirs[path]
if len(files) == 1:
return files[0]
elif parent in self.dirs:
for f in self.dirs[parent]:
if f['Key'] == path:
return f
try:
bucket, key = split_path(path)
if version_id is not None:
if not self.version_aware:
raise ValueError("version_id cannot be specified if the "
"filesystem is not version aware")
kwargs['VersionId'] = version_id
out = self._call_s3(self.s3.head_object, kwargs, Bucket=bucket,
Key=key, **self.req_kw)
out = {
'ETag': out['ETag'],
'Key': '/'.join([bucket, key]),
'LastModified': out['LastModified'],
'Size': out['ContentLength'],
'StorageClass': "STANDARD",
'VersionId': out.get('VersionId')
}
return out
except (ClientError, ParamValidationError):
logger.debug("Failed to head path %s", path, exc_info=True)
> raise FileNotFoundError(path)
E FileNotFoundError: pandas-test/pyarrow.parquet
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:478: FileNotFoundError
----------------------------- Captured stderr call -----------------------------
Exception requests.exceptions.ConnectionError: ConnectionError(u'Connection refused: PUT https://pandas-test.s3.amazonaws.com/pyarrow.parquet',) in <bound method S3File.__del__ of <S3File pandas-test/pyarrow.parquet>> ignored
Debugging now.