Skip to content

Commit 9a80160

Browse files
author
Krzysztof Chomski
committed
BUG: GH17778 - DataFrame.to_pickle() fails for .zip format.
GH17778: add 'zip' format to unittests. Added entry in doc/source/whatsnew/v0.22.0.txt file to Bug Fixes section.
1 parent 17e0b13 commit 9a80160

File tree

4 files changed

+27
-14
lines changed

4 files changed

+27
-14
lines changed

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ Bug Fixes
8989

9090
- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`)
9191
- Bug in ``DataFrame.groupby`` where key as tuple in a ``MultiIndex`` were interpreted as a list of keys (:issue:`17979`)
92+
- Bug in ``DataFrame.to_pickle()`` fails for .zip format (:issue:`17778`)
9293
- Bug in :func:`pd.read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
9394

9495
Conversion

pandas/io/common.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,20 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
357357
# ZIP Compression
358358
elif compression == 'zip':
359359
import zipfile
360-
zip_file = zipfile.ZipFile(path_or_buf)
361-
zip_names = zip_file.namelist()
362-
if len(zip_names) == 1:
363-
f = zip_file.open(zip_names.pop())
364-
elif len(zip_names) == 0:
365-
raise ValueError('Zero files found in ZIP file {}'
366-
.format(path_or_buf))
360+
if mode == 'wb':
361+
f = zipfile.ZipFile(path_or_buf, 'w')
367362
else:
368-
raise ValueError('Multiple files found in ZIP file.'
369-
' Only one file per ZIP: {}'
370-
.format(zip_names))
363+
zip_file = zipfile.ZipFile(path_or_buf)
364+
zip_names = zip_file.namelist()
365+
if len(zip_names) == 1:
366+
f = zip_file.open(zip_names.pop())
367+
elif len(zip_names) == 0:
368+
raise ValueError('Zero files found in ZIP file {}'
369+
.format(path_or_buf))
370+
else:
371+
raise ValueError('Multiple files found in ZIP file.'
372+
' Only one file per ZIP: {}'
373+
.format(zip_names))
371374

372375
# XZ Compression
373376
elif compression == 'xz':

pandas/io/pickle.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,15 @@ def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
4242
if protocol < 0:
4343
protocol = pkl.HIGHEST_PROTOCOL
4444
try:
45-
pkl.dump(obj, f, protocol=protocol)
45+
import zipfile
46+
if isinstance(f, zipfile.ZipFile):
47+
import tempfile
48+
tmp_file = tempfile.NamedTemporaryFile()
49+
pkl.dump(obj, tmp_file, protocol=protocol)
50+
tmp_file.seek(0)
51+
f.write(tmp_file.name)
52+
else:
53+
pkl.dump(obj, f, protocol=protocol)
4654
finally:
4755
for _f in fh:
4856
_f.close()

pandas/tests/io/test_pickle.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def decompress_file(self, src_path, dest_path, compression):
382382
fh.write(f.read())
383383
f.close()
384384

385-
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz'])
385+
@pytest.mark.parametrize('compression', [None, 'gzip', 'zip', 'bz2', 'xz'])
386386
def test_write_explicit(self, compression, get_random_path):
387387
# issue 11666
388388
if compression == 'xz':
@@ -414,7 +414,8 @@ def test_write_explicit_bad(self, compression, get_random_path):
414414
df = tm.makeDataFrame()
415415
df.to_pickle(path, compression=compression)
416416

417-
@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress'])
417+
@pytest.mark.parametrize('ext', ['', '.gz', '.zip', '.bz2', '.xz',
418+
'.no_compress'])
418419
def test_write_infer(self, ext, get_random_path):
419420
if ext == '.xz':
420421
tm._skip_if_no_lzma()
@@ -442,7 +443,7 @@ def test_write_infer(self, ext, get_random_path):
442443

443444
tm.assert_frame_equal(df, df2)
444445

445-
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
446+
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', 'zip'])
446447
def test_read_explicit(self, compression, get_random_path):
447448
# issue 11666
448449
if compression == 'xz':

0 commit comments

Comments
 (0)