Skip to content

Commit 89ddd8a

Browse files
authored
REGR: errors='replace' when encoding/errors are not specified (#38997)
1 parent 1231e2c commit 89ddd8a

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

doc/source/whatsnew/v1.2.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2525
- Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`)
2626
- Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`)
27+
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
2728

2829
.. ---------------------------------------------------------------------------
2930

pandas/io/common.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,7 @@ def get_handle(
553553
Returns the dataclass IOHandles
554554
"""
555555
# Windows does not default to utf-8. Set to utf-8 for a consistent behavior
556-
if encoding is None:
557-
encoding = "utf-8"
556+
encoding_passed, encoding = encoding, encoding or "utf-8"
558557

559558
# read_csv does not know whether the buffer is opened in binary/text mode
560559
if _is_binary_mode(path_or_buf, mode) and "b" not in mode:
@@ -641,6 +640,9 @@ def get_handle(
641640
# Check whether the filename is to be opened in binary mode.
642641
# Binary mode does not support 'encoding' and 'newline'.
643642
if ioargs.encoding and "b" not in ioargs.mode:
643+
if errors is None and encoding_passed is None:
644+
# ignore errors when no encoding is specified
645+
errors = "replace"
644646
# Encoding
645647
handle = open(
646648
handle,

pandas/tests/io/test_common.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,3 +419,11 @@ def test_is_fsspec_url():
419419
assert not icom.is_fsspec_url("random:pandas/somethingelse.com")
420420
assert not icom.is_fsspec_url("/local/path")
421421
assert not icom.is_fsspec_url("relative/local/path")
422+
423+
424+
def test_default_errors():
425+
# GH 38989
426+
with tm.ensure_clean() as path:
427+
file = Path(path)
428+
file.write_bytes(b"\xe4\na\n1")
429+
tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))

0 commit comments

Comments
 (0)