TST/CLN: fix sys1:ResourceWarning due to open sockets

h-vetinari · h-vetinari · commit a4858ae984a5 · 2018-11-16T02:10:52.000+01:00
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -399,10 +399,11 @@ def __init__(self, io, **kwds):
 
         # If io is a url, want to keep the data as bytes so can't pass
         # to get_filepath_or_buffer()
+        should_close = False
         if _is_url(self._io):
             io = _urlopen(self._io)
         elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
-            io, _, _, _ = get_filepath_or_buffer(self._io)
+            io, _, _, should_close = get_filepath_or_buffer(self._io)
 
         if engine == 'xlrd' and isinstance(io, xlrd.Book):
             self.book = io
@@ -425,6 +426,13 @@ def __init__(self, io, **kwds):
             raise ValueError('Must explicitly set engine if not passing in'
                              ' buffer or path for io.')
 
+        if should_close:
+            try:
+                io.close()
+            except AttributeError:
+                # io is not file-like (e.g. a string)
+                pass
+
     def __fspath__(self):
         return self._io
 
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -104,7 +104,7 @@ def write(self, df, path, compression='snappy',
               coerce_timestamps='ms', index=None, partition_cols=None,
               **kwargs):
         self.validate_dataframe(df)
-        path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
+        path, _, _, should_close = get_filepath_or_buffer(path, mode='wb')
 
         if index is None:
             from_pandas_kwargs = {}
@@ -121,6 +121,13 @@ def write(self, df, path, compression='snappy',
                 table, path, compression=compression,
                 coerce_timestamps=coerce_timestamps, **kwargs)
 
+        if should_close:
+            try:
+                path.close()
+            except AttributeError:
+                # path is not file-like (e.g. a string)
+                pass
+
     def read(self, path, columns=None, **kwargs):
         path, _, _, should_close = get_filepath_or_buffer(path)
 
@@ -130,7 +137,8 @@ def read(self, path, columns=None, **kwargs):
         if should_close:
             try:
                 path.close()
-            except:  # noqa: flake8
+            except AttributeError:
+                # path is not file-like (e.g. a string)
                 pass
 
         return result
@@ -183,17 +191,24 @@ def write(self, df, path, compression='snappy', index=None,
             # path is s3:// so we need to open the s3file in 'wb' mode.
             # TODO: Support 'ab'
 
-            path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
+            path, _, _, should_close = get_filepath_or_buffer(path, mode='wb')
             # And pass the opened s3file to the fastparquet internal impl.
             kwargs['open_with'] = lambda path, _: path
         else:
-            path, _, _, _ = get_filepath_or_buffer(path)
+            path, _, _, should_close = get_filepath_or_buffer(path)
 
         with catch_warnings(record=True):
             self.api.write(path, df, compression=compression,
                            write_index=index, partition_on=partition_cols,
                            **kwargs)
 
+        if should_close:
+            try:
+                path.close()
+            except AttributeError:
+                # path is not file-like (e.g. a string)
+                pass
+
     def read(self, path, columns=None, **kwargs):
         if is_s3_url(path):
             # When path is s3:// an S3File is returned.
@@ -205,9 +220,16 @@ def read(self, path, columns=None, **kwargs):
             finally:
                 s3.close()
         else:
-            path, _, _, _ = get_filepath_or_buffer(path)
+            path, _, _, should_close = get_filepath_or_buffer(path)
             parquet_file = self.api.ParquetFile(path)
 
+        if should_close:
+            try:
+                path.close()
+            except (AttributeError, OSError):
+                # path is not file-like (e.g. a string)
+                pass
+
         return parquet_file.to_pandas(columns=columns, **kwargs)
 
 
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
@@ -21,7 +21,7 @@ def salaries_table(datapath):
 
 
 @pytest.fixture
-def s3_resource(tips_file, jsonl_file):
+def s3_resource(capfd, tips_file, jsonl_file):
     """Fixture for mocking S3 interaction.
 
     The primary bucket name is "pandas-test". The following datasets
@@ -70,5 +70,12 @@ def add_tips_files(bucket_name):
         yield conn
     except:  # noqa: flake8
         pytest.skip("failure to use s3 resource")
+    else:  # no exception happened above
+        # this uses the built-in pytest fixture capfd, which captures both
+        # stdout/stderr and also output from libraries/subprocesses to file
+        # descriptors 1&2; see https://docs.pytest.org/en/latest/capture.html
+        captured = capfd.readouterr()
+        assert not captured.out
+        assert not captured.err
     finally:
         s3.stop()
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
@@ -33,7 +33,7 @@ def test_read_zipped_json(datapath):
 
 
 @td.skip_if_not_us_locale
-def test_with_s3_url(compression):
+def test_with_s3_url(compression, capfd):
     boto3 = pytest.importorskip('boto3')
     pytest.importorskip('s3fs')
     moto = pytest.importorskip('moto')
@@ -52,6 +52,10 @@ def test_with_s3_url(compression):
                                        compression=compression)
         assert_frame_equal(df, roundtripped_df)
 
+    out, err = capfd.readouterr()
+    assert not out
+    assert not err
+
 
 def test_lines_with_compression(compression):
 
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
@@ -693,7 +693,7 @@ def test_read_from_http_url(self, ext):
 
     @td.skip_if_no("s3fs")
     @td.skip_if_not_us_locale
-    def test_read_from_s3_url(self, ext):
+    def test_read_from_s3_url(self, ext, capfd):
         moto = pytest.importorskip("moto")
         boto3 = pytest.importorskip("boto3")
 
@@ -711,6 +711,10 @@ def test_read_from_s3_url(self, ext):
             local_table = self.get_exceldf('test1', ext)
             tm.assert_frame_equal(url_table, local_table)
 
+        out, err = capfd.readouterr()
+        assert not out
+        assert not err
+
     @pytest.mark.slow
     # ignore warning from old xlrd
     @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning")
diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py
@@ -12,7 +12,7 @@ def test_is_s3_url(self):
         assert not is_s3_url("s4://pandas/somethingelse.com")
 
 
-def test_streaming_s3_objects():
+def test_streaming_s3_objects(capfd):
     # GH17135
     # botocore gained iteration support in 1.10.47, can now be used in read_*
     pytest.importorskip('botocore', minversion='1.10.47')
@@ -25,3 +25,8 @@ def test_streaming_s3_objects():
     for el in data:
         body = StreamingBody(BytesIO(el), content_length=len(el))
         read_csv(body)
+        body.close()
+
+    out, err = capfd.readouterr()
+    assert not out
+    assert not err