Skip to content

Commit a4858ae

Browse files
committed
TST/CLN: fix sys1:ResourceWarning due to open sockets
1 parent e98032d commit a4858ae

File tree

6 files changed

+60
-10
lines changed

6 files changed

+60
-10
lines changed

pandas/io/excel.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,11 @@ def __init__(self, io, **kwds):
399399

400400
# If io is a url, want to keep the data as bytes so can't pass
401401
# to get_filepath_or_buffer()
402+
should_close = False
402403
if _is_url(self._io):
403404
io = _urlopen(self._io)
404405
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
405-
io, _, _, _ = get_filepath_or_buffer(self._io)
406+
io, _, _, should_close = get_filepath_or_buffer(self._io)
406407

407408
if engine == 'xlrd' and isinstance(io, xlrd.Book):
408409
self.book = io
@@ -425,6 +426,13 @@ def __init__(self, io, **kwds):
425426
raise ValueError('Must explicitly set engine if not passing in'
426427
' buffer or path for io.')
427428

429+
if should_close:
430+
try:
431+
io.close()
432+
except AttributeError:
433+
# io is not file-like (e.g. a string)
434+
pass
435+
428436
def __fspath__(self):
429437
return self._io
430438

pandas/io/parquet.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def write(self, df, path, compression='snappy',
104104
coerce_timestamps='ms', index=None, partition_cols=None,
105105
**kwargs):
106106
self.validate_dataframe(df)
107-
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
107+
path, _, _, should_close = get_filepath_or_buffer(path, mode='wb')
108108

109109
if index is None:
110110
from_pandas_kwargs = {}
@@ -121,6 +121,13 @@ def write(self, df, path, compression='snappy',
121121
table, path, compression=compression,
122122
coerce_timestamps=coerce_timestamps, **kwargs)
123123

124+
if should_close:
125+
try:
126+
path.close()
127+
except AttributeError:
128+
# path is not file-like (e.g. a string)
129+
pass
130+
124131
def read(self, path, columns=None, **kwargs):
125132
path, _, _, should_close = get_filepath_or_buffer(path)
126133

@@ -130,7 +137,8 @@ def read(self, path, columns=None, **kwargs):
130137
if should_close:
131138
try:
132139
path.close()
133-
except: # noqa: flake8
140+
except AttributeError:
141+
# path is not file-like (e.g. a string)
134142
pass
135143

136144
return result
@@ -183,17 +191,24 @@ def write(self, df, path, compression='snappy', index=None,
183191
# path is s3:// so we need to open the s3file in 'wb' mode.
184192
# TODO: Support 'ab'
185193

186-
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
194+
path, _, _, should_close = get_filepath_or_buffer(path, mode='wb')
187195
# And pass the opened s3file to the fastparquet internal impl.
188196
kwargs['open_with'] = lambda path, _: path
189197
else:
190-
path, _, _, _ = get_filepath_or_buffer(path)
198+
path, _, _, should_close = get_filepath_or_buffer(path)
191199

192200
with catch_warnings(record=True):
193201
self.api.write(path, df, compression=compression,
194202
write_index=index, partition_on=partition_cols,
195203
**kwargs)
196204

205+
if should_close:
206+
try:
207+
path.close()
208+
except AttributeError:
209+
# path is not file-like (e.g. a string)
210+
pass
211+
197212
def read(self, path, columns=None, **kwargs):
198213
if is_s3_url(path):
199214
# When path is s3:// an S3File is returned.
@@ -205,9 +220,16 @@ def read(self, path, columns=None, **kwargs):
205220
finally:
206221
s3.close()
207222
else:
208-
path, _, _, _ = get_filepath_or_buffer(path)
223+
path, _, _, should_close = get_filepath_or_buffer(path)
209224
parquet_file = self.api.ParquetFile(path)
210225

226+
if should_close:
227+
try:
228+
path.close()
229+
except (AttributeError, OSError):
230+
# path is not file-like (e.g. a string)
231+
pass
232+
211233
return parquet_file.to_pandas(columns=columns, **kwargs)
212234

213235

pandas/tests/io/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def salaries_table(datapath):
2121

2222

2323
@pytest.fixture
24-
def s3_resource(tips_file, jsonl_file):
24+
def s3_resource(capfd, tips_file, jsonl_file):
2525
"""Fixture for mocking S3 interaction.
2626
2727
The primary bucket name is "pandas-test". The following datasets
@@ -70,5 +70,12 @@ def add_tips_files(bucket_name):
7070
yield conn
7171
except: # noqa: flake8
7272
pytest.skip("failure to use s3 resource")
73+
else: # no exception happened above
74+
# this uses the built-in pytest fixture capfd, which captures both
75+
# stdout/stderr and also output from libraries/subprocesses to file
76+
# descriptors 1&2; see https://docs.pytest.org/en/latest/capture.html
77+
captured = capfd.readouterr()
78+
assert not captured.out
79+
assert not captured.err
7380
finally:
7481
s3.stop()

pandas/tests/io/json/test_compression.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_read_zipped_json(datapath):
3333

3434

3535
@td.skip_if_not_us_locale
36-
def test_with_s3_url(compression):
36+
def test_with_s3_url(compression, capfd):
3737
boto3 = pytest.importorskip('boto3')
3838
pytest.importorskip('s3fs')
3939
moto = pytest.importorskip('moto')
@@ -52,6 +52,10 @@ def test_with_s3_url(compression):
5252
compression=compression)
5353
assert_frame_equal(df, roundtripped_df)
5454

55+
out, err = capfd.readouterr()
56+
assert not out
57+
assert not err
58+
5559

5660
def test_lines_with_compression(compression):
5761

pandas/tests/io/test_excel.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ def test_read_from_http_url(self, ext):
693693

694694
@td.skip_if_no("s3fs")
695695
@td.skip_if_not_us_locale
696-
def test_read_from_s3_url(self, ext):
696+
def test_read_from_s3_url(self, ext, capfd):
697697
moto = pytest.importorskip("moto")
698698
boto3 = pytest.importorskip("boto3")
699699

@@ -711,6 +711,10 @@ def test_read_from_s3_url(self, ext):
711711
local_table = self.get_exceldf('test1', ext)
712712
tm.assert_frame_equal(url_table, local_table)
713713

714+
out, err = capfd.readouterr()
715+
assert not out
716+
assert not err
717+
714718
@pytest.mark.slow
715719
# ignore warning from old xlrd
716720
@pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning")

pandas/tests/io/test_s3.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def test_is_s3_url(self):
1212
assert not is_s3_url("s4://pandas/somethingelse.com")
1313

1414

15-
def test_streaming_s3_objects():
15+
def test_streaming_s3_objects(capfd):
1616
# GH17135
1717
# botocore gained iteration support in 1.10.47, can now be used in read_*
1818
pytest.importorskip('botocore', minversion='1.10.47')
@@ -25,3 +25,8 @@ def test_streaming_s3_objects():
2525
for el in data:
2626
body = StreamingBody(BytesIO(el), content_length=len(el))
2727
read_csv(body)
28+
body.close()
29+
30+
out, err = capfd.readouterr()
31+
assert not out
32+
assert not err

0 commit comments

Comments
 (0)