Skip to content

Commit 1319766

Browse files
authored
use compression=None (again) to avoid inferring compression (#37909)
1 parent 463cd0a commit 1319766

File tree

4 files changed

+21
-4
lines changed

4 files changed

+21
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,7 @@ I/O
647647
- Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
648648
- :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
649649
- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
650+
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
650651

651652
Period
652653
^^^^^^

pandas/io/common.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,11 @@ def infer_compression(
468468
------
469469
ValueError on invalid compression specified.
470470
"""
471+
if compression is None:
472+
return None
473+
471474
# Infer compression
472-
if compression in ("infer", None):
475+
if compression == "infer":
473476
# Convert all path types (e.g. pathlib.Path) to strings
474477
filepath_or_buffer = stringify_path(filepath_or_buffer)
475478
if not isinstance(filepath_or_buffer, str):

pandas/tests/io/parser/test_compression.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
"""
55

66
import os
7+
from pathlib import Path
78
import zipfile
89

910
import pytest
1011

11-
import pandas as pd
12+
from pandas import DataFrame
1213
import pandas._testing as tm
1314

1415

@@ -130,7 +131,7 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
130131
path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip")
131132

132133
result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t")
133-
expected = pd.DataFrame(
134+
expected = DataFrame(
134135
{
135136
"Country": ["Venezuela", "Venezuela"],
136137
"Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
@@ -149,3 +150,15 @@ def test_invalid_compression(all_parsers, invalid_compression):
149150

150151
with pytest.raises(ValueError, match=msg):
151152
parser.read_csv("test_file.zip", **compress_kwargs)
153+
154+
155+
def test_ignore_compression_extension(all_parsers):
156+
parser = all_parsers
157+
df = DataFrame({"a": [0, 1]})
158+
with tm.ensure_clean("test.csv") as path_csv:
159+
with tm.ensure_clean("test.csv.zip") as path_zip:
160+
# make sure to create un-compressed file with zip extension
161+
df.to_csv(path_csv, index=False)
162+
Path(path_zip).write_text(Path(path_csv).read_text())
163+
164+
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)

pandas/tests/io/parser/test_read_fwf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ def test_default_delimiter():
638638
tm.assert_frame_equal(result, expected)
639639

640640

641-
@pytest.mark.parametrize("infer", [True, False, None])
641+
@pytest.mark.parametrize("infer", [True, False])
642642
def test_fwf_compression(compression_only, infer):
643643
data = """1111111111
644644
2222222222

0 commit comments

Comments
 (0)