S3 parquet failures on Travis

edit: We've pinned to moto 1.3.4 for now. That seems to avoid the issues. *Assuming* that it's an issue with moto (which hasn't been verified), the remaining TODOs here are

- [ ] Reproduce locally with moto 1.3.6 or higher
- [ ] construct a minimal test case not using pandas, and ideally not using s3fs
- [ ] report upstream to moto
- [ ] unpin moto when it's fixed

---

https://travis-ci.org/pandas-dev/pandas/jobs/435861714#L2506

```
=================================== FAILURES ===================================
_____________________ TestParquetPyArrow.test_s3_roundtrip _____________________
[gw0] linux2 -- Python 2.7.15 /home/travis/miniconda3/envs/pandas/bin/python
self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x7f3966d04910>
df_compat =    A    B
0  1  foo
1  2  foo
2  3  foo
s3_resource = s3.ServiceResource(), pa = 'pyarrow'
    def test_s3_roundtrip(self, df_compat, s3_resource, pa):
        # GH #19134
        check_round_trip(df_compat, pa,
>                        path='s3://pandas-test/pyarrow.parquet')
pandas/tests/io/test_parquet.py:474: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pandas/tests/io/test_parquet.py:169: in check_round_trip
    compare(repeat)
pandas/tests/io/test_parquet.py:161: in compare
    actual = read_parquet(path, **read_kwargs)
pandas/io/parquet.py:303: in read_parquet
    return impl.read(path, columns=columns, **kwargs)
pandas/io/parquet.py:132: in read
    path, _, _, should_close = get_filepath_or_buffer(path)
pandas/io/common.py:216: in get_filepath_or_buffer
    mode=mode)
pandas/io/s3.py:38: in get_filepath_or_buffer
    filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:335: in open
    s3_additional_kwargs=kw)
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:1143: in __init__
    info = self.info()
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:1161: in info
    refresh=refresh, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = <s3fs.core.S3FileSystem object at 0x7f395fb5a890>
path = 'pandas-test/pyarrow.parquet', version_id = None, refresh = False
kwargs = {}, parent = 'pandas-test', bucket = 'pandas-test'
key = 'pyarrow.parquet'
    def info(self, path, version_id=None, refresh=False, **kwargs):
        """ Detail on the specific file pointed to by path.
    
            Gets details only for a specific key, directories/buckets cannot be
            used with info.
    
            Parameters
            ----------
            version_id : str, optional
                version of the key to perform the head_object on
            refresh : bool
                If true, don't look in the info cache
            """
        parent = path.rsplit('/', 1)[0]
    
        if not refresh:
            if path in self.dirs:
                files = self.dirs[path]
                if len(files) == 1:
                    return files[0]
            elif parent in self.dirs:
                for f in self.dirs[parent]:
                    if f['Key'] == path:
                        return f
    
        try:
            bucket, key = split_path(path)
            if version_id is not None:
                if not self.version_aware:
                    raise ValueError("version_id cannot be specified if the "
                                     "filesystem is not version aware")
                kwargs['VersionId'] = version_id
            out = self._call_s3(self.s3.head_object, kwargs, Bucket=bucket,
                                Key=key, **self.req_kw)
            out = {
                'ETag': out['ETag'],
                'Key': '/'.join([bucket, key]),
                'LastModified': out['LastModified'],
                'Size': out['ContentLength'],
                'StorageClass': "STANDARD",
                'VersionId': out.get('VersionId')
            }
            return out
        except (ClientError, ParamValidationError):
            logger.debug("Failed to head path %s", path, exc_info=True)
>           raise FileNotFoundError(path)
E           FileNotFoundError: pandas-test/pyarrow.parquet
../../../miniconda3/envs/pandas/lib/python2.7/site-packages/s3fs/core.py:478: FileNotFoundError
----------------------------- Captured stderr call -----------------------------
Exception requests.exceptions.ConnectionError: ConnectionError(u'Connection refused: PUT https://pandas-test.s3.amazonaws.com/pyarrow.parquet',) in <bound method S3File.__del__ of <S3File pandas-test/pyarrow.parquet>> ignored
```

Debugging now.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

S3 parquet failures on Travis #22934

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

S3 parquet failures on Travis #22934

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions