Skip to content

Commit eb9e823

Browse files
reidy-pgfyoung
authored andcommitted
TST: Clean up pickle compression tests (#19350)
* TST: Clean up pickle compression tests * create compression_no_zip fixture
1 parent 7202635 commit eb9e823

File tree

7 files changed

+71
-80
lines changed

7 files changed

+71
-80
lines changed

pandas/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy
55
import pandas
66
import dateutil
7+
import pandas.util._test_decorators as td
78

89

910
def pytest_addoption(parser):
@@ -73,3 +74,22 @@ def ip():
7374
is_dateutil_gt_261 = pytest.mark.skipif(
7475
LooseVersion(dateutil.__version__) <= LooseVersion('2.6.1'),
7576
reason="dateutil stable version")
77+
78+
79+
@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip',
80+
pytest.param('xz', marks=td.skip_if_no_lzma)])
81+
def compression(request):
82+
"""
83+
Fixture for trying common compression types in compression tests
84+
"""
85+
return request.param
86+
87+
88+
@pytest.fixture(params=[None, 'gzip', 'bz2',
89+
pytest.param('xz', marks=td.skip_if_no_lzma)])
90+
def compression_no_zip(request):
91+
"""
92+
Fixture for trying common compression types in compression tests
93+
except zip
94+
"""
95+
return request.param

pandas/tests/conftest.py

Lines changed: 0 additions & 11 deletions
This file was deleted.

pandas/tests/frame/test_to_csv.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -919,27 +919,28 @@ def test_to_csv_path_is_none(self):
919919
recons = pd.read_csv(StringIO(csv_str), index_col=0)
920920
assert_frame_equal(self.frame, recons)
921921

922-
def test_to_csv_compression(self, compression):
922+
def test_to_csv_compression(self, compression_no_zip):
923923

924924
df = DataFrame([[0.123456, 0.234567, 0.567567],
925925
[12.32112, 123123.2, 321321.2]],
926926
index=['A', 'B'], columns=['X', 'Y', 'Z'])
927927

928928
with ensure_clean() as filename:
929929

930-
df.to_csv(filename, compression=compression)
930+
df.to_csv(filename, compression=compression_no_zip)
931931

932932
# test the round trip - to_csv -> read_csv
933-
rs = read_csv(filename, compression=compression, index_col=0)
933+
rs = read_csv(filename, compression=compression_no_zip,
934+
index_col=0)
934935
assert_frame_equal(df, rs)
935936

936937
# explicitly make sure file is compressed
937-
with tm.decompress_file(filename, compression) as fh:
938+
with tm.decompress_file(filename, compression_no_zip) as fh:
938939
text = fh.read().decode('utf8')
939940
for col in df.columns:
940941
assert col in text
941942

942-
with tm.decompress_file(filename, compression) as fh:
943+
with tm.decompress_file(filename, compression_no_zip) as fh:
943944
assert_frame_equal(df, read_csv(fh, index_col=0))
944945

945946
def test_to_csv_compression_value_error(self):

pandas/tests/io/json/test_compression.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,18 @@
55
from pandas.util.testing import assert_frame_equal, assert_raises_regex
66

77

8-
def test_compression_roundtrip(compression):
8+
def test_compression_roundtrip(compression_no_zip):
99
df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
1010
[12.32112, 123123.2, 321321.2]],
1111
index=['A', 'B'], columns=['X', 'Y', 'Z'])
1212

1313
with tm.ensure_clean() as path:
14-
df.to_json(path, compression=compression)
15-
assert_frame_equal(df, pd.read_json(path, compression=compression))
14+
df.to_json(path, compression=compression_no_zip)
15+
assert_frame_equal(df, pd.read_json(path,
16+
compression=compression_no_zip))
1617

1718
# explicitly ensure file was compressed.
18-
with tm.decompress_file(path, compression) as fh:
19+
with tm.decompress_file(path, compression_no_zip) as fh:
1920
result = fh.read().decode('utf8')
2021
assert_frame_equal(df, pd.read_json(result))
2122

@@ -40,7 +41,7 @@ def test_read_zipped_json():
4041
assert_frame_equal(uncompressed_df, compressed_df)
4142

4243

43-
def test_with_s3_url(compression):
44+
def test_with_s3_url(compression_no_zip):
4445
boto3 = pytest.importorskip('boto3')
4546
pytest.importorskip('s3fs')
4647
moto = pytest.importorskip('moto')
@@ -51,31 +52,36 @@ def test_with_s3_url(compression):
5152
bucket = conn.create_bucket(Bucket="pandas-test")
5253

5354
with tm.ensure_clean() as path:
54-
df.to_json(path, compression=compression)
55+
df.to_json(path, compression=compression_no_zip)
5556
with open(path, 'rb') as f:
5657
bucket.put_object(Key='test-1', Body=f)
5758

5859
roundtripped_df = pd.read_json('s3://pandas-test/test-1',
59-
compression=compression)
60+
compression=compression_no_zip)
6061
assert_frame_equal(df, roundtripped_df)
6162

6263

63-
def test_lines_with_compression(compression):
64+
def test_lines_with_compression(compression_no_zip):
65+
6466
with tm.ensure_clean() as path:
6567
df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
66-
df.to_json(path, orient='records', lines=True, compression=compression)
68+
df.to_json(path, orient='records', lines=True,
69+
compression=compression_no_zip)
6770
roundtripped_df = pd.read_json(path, lines=True,
68-
compression=compression)
71+
compression=compression_no_zip)
6972
assert_frame_equal(df, roundtripped_df)
7073

7174

72-
def test_chunksize_with_compression(compression):
75+
def test_chunksize_with_compression(compression_no_zip):
76+
7377
with tm.ensure_clean() as path:
7478
df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
75-
df.to_json(path, orient='records', lines=True, compression=compression)
79+
df.to_json(path, orient='records', lines=True,
80+
compression=compression_no_zip)
7681

77-
roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1,
78-
compression=compression))
82+
res = pd.read_json(path, lines=True, chunksize=1,
83+
compression=compression_no_zip)
84+
roundtripped_df = pd.concat(res)
7985
assert_frame_equal(df, roundtripped_df)
8086

8187

pandas/tests/io/test_pickle.py

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -352,42 +352,7 @@ def compress_file(self, src_path, dest_path, compression):
352352
f.write(fh.read())
353353
f.close()
354354

355-
def decompress_file(self, src_path, dest_path, compression):
356-
if compression is None:
357-
shutil.copyfile(src_path, dest_path)
358-
return
359-
360-
if compression == 'gzip':
361-
import gzip
362-
f = gzip.open(src_path, "r")
363-
elif compression == 'bz2':
364-
import bz2
365-
f = bz2.BZ2File(src_path, "r")
366-
elif compression == 'zip':
367-
import zipfile
368-
zip_file = zipfile.ZipFile(src_path)
369-
zip_names = zip_file.namelist()
370-
if len(zip_names) == 1:
371-
f = zip_file.open(zip_names.pop())
372-
else:
373-
raise ValueError('ZIP file {} error. Only one file per ZIP.'
374-
.format(src_path))
375-
elif compression == 'xz':
376-
lzma = pandas.compat.import_lzma()
377-
f = lzma.LZMAFile(src_path, "r")
378-
else:
379-
msg = 'Unrecognized compression type: {}'.format(compression)
380-
raise ValueError(msg)
381-
382-
with open(dest_path, "wb") as fh:
383-
fh.write(f.read())
384-
f.close()
385-
386-
@pytest.mark.parametrize('compression', [
387-
None, 'gzip', 'bz2',
388-
pytest.param('xz', marks=td.skip_if_no_lzma) # issue 11666
389-
])
390-
def test_write_explicit(self, compression, get_random_path):
355+
def test_write_explicit(self, compression_no_zip, get_random_path):
391356
base = get_random_path
392357
path1 = base + ".compressed"
393358
path2 = base + ".raw"
@@ -396,10 +361,12 @@ def test_write_explicit(self, compression, get_random_path):
396361
df = tm.makeDataFrame()
397362

398363
# write to compressed file
399-
df.to_pickle(p1, compression=compression)
364+
df.to_pickle(p1, compression=compression_no_zip)
400365

401366
# decompress
402-
self.decompress_file(p1, p2, compression=compression)
367+
with tm.decompress_file(p1, compression=compression_no_zip) as f:
368+
with open(p2, "wb") as fh:
369+
fh.write(f.read())
403370

404371
# read decompressed file
405372
df2 = pd.read_pickle(p2, compression=None)
@@ -435,17 +402,15 @@ def test_write_infer(self, ext, get_random_path):
435402
df.to_pickle(p1)
436403

437404
# decompress
438-
self.decompress_file(p1, p2, compression=compression)
405+
with tm.decompress_file(p1, compression=compression) as f:
406+
with open(p2, "wb") as fh:
407+
fh.write(f.read())
439408

440409
# read decompressed file
441410
df2 = pd.read_pickle(p2, compression=None)
442411

443412
tm.assert_frame_equal(df, df2)
444413

445-
@pytest.mark.parametrize('compression', [
446-
None, 'gzip', 'bz2', "zip",
447-
pytest.param('xz', marks=td.skip_if_no_lzma)
448-
])
449414
def test_read_explicit(self, compression, get_random_path):
450415
base = get_random_path
451416
path1 = base + ".raw"

pandas/tests/series/test_io.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,28 +138,29 @@ def test_to_csv_path_is_none(self):
138138
csv_str = s.to_csv(path=None)
139139
assert isinstance(csv_str, str)
140140

141-
def test_to_csv_compression(self, compression):
141+
def test_to_csv_compression(self, compression_no_zip):
142142

143143
s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
144144
name='X')
145145

146146
with ensure_clean() as filename:
147147

148-
s.to_csv(filename, compression=compression, header=True)
148+
s.to_csv(filename, compression=compression_no_zip, header=True)
149149

150150
# test the round trip - to_csv -> read_csv
151-
rs = pd.read_csv(filename, compression=compression, index_col=0,
152-
squeeze=True)
151+
rs = pd.read_csv(filename, compression=compression_no_zip,
152+
index_col=0, squeeze=True)
153153
assert_series_equal(s, rs)
154154

155155
# explicitly ensure file was compressed
156-
with tm.decompress_file(filename, compression=compression) as fh:
156+
with tm.decompress_file(filename, compression_no_zip) as fh:
157157
text = fh.read().decode('utf8')
158158
assert s.name in text
159159

160-
with tm.decompress_file(filename, compression=compression) as fh:
160+
with tm.decompress_file(filename, compression_no_zip) as fh:
161161
assert_series_equal(s, pd.read_csv(fh,
162-
index_col=0, squeeze=True))
162+
index_col=0,
163+
squeeze=True))
163164

164165

165166
class TestSeriesIO(TestData):

pandas/util/testing.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,15 @@ def decompress_file(path, compression):
191191
elif compression == 'xz':
192192
lzma = compat.import_lzma()
193193
f = lzma.LZMAFile(path, 'rb')
194+
elif compression == 'zip':
195+
import zipfile
196+
zip_file = zipfile.ZipFile(path)
197+
zip_names = zip_file.namelist()
198+
if len(zip_names) == 1:
199+
f = zip_file.open(zip_names.pop())
200+
else:
201+
raise ValueError('ZIP file {} error. Only one file per ZIP.'
202+
.format(path))
194203
else:
195204
msg = 'Unrecognized compression type: {}'.format(compression)
196205
raise ValueError(msg)

0 commit comments

Comments
 (0)