Skip to content

API: Rename CParserError to ParserError #14479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1165,8 +1165,8 @@ too many will cause an error by default:

In [28]: pd.read_csv(StringIO(data))
---------------------------------------------------------------------------
CParserError Traceback (most recent call last)
CParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
ParserError Traceback (most recent call last)
ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4

You can elect to skip bad lines:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Backwards incompatible API changes
.. _whatsnew_0200.api:


- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)



Expand Down
8 changes: 5 additions & 3 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,15 @@ def urlopen(*args, **kwargs):
_VALID_URLS.discard('')


class CParserError(ValueError):
class ParserError(ValueError):
"""
Exception that is thrown by the C engine when it encounters
a parsing error in `pd.read_csv`
Exception that is thrown by an error is encountered in `pd.read_csv`
"""
pass

# gh-12665: Alias for now and remove later.
CParserError = ParserError


class DtypeWarning(Warning):
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pandas.io.date_converters import generic_parser
from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg,
_get_handle, UnicodeReader, UTF8Recoder,
BaseIterator, CParserError, EmptyDataError,
BaseIterator, ParserError, EmptyDataError,
ParserWarning, _NA_VALUES)
from pandas.tseries import tools

Expand Down Expand Up @@ -1141,7 +1141,7 @@ def tostr(x):
# long
for n in range(len(columns[0])):
if all(['Unnamed' in tostr(c[n]) for c in columns]):
raise CParserError(
raise ParserError(
"Passed header=[%s] are too many rows for this "
"multi_index of columns"
% ','.join([str(x) for x in self.header])
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/tests/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_bad_stream_exception(self):
# Issue 13652:
# This test validates that both python engine
# and C engine will raise UnicodeDecodeError instead of
# c engine raising CParserError and swallowing exception
# c engine raising ParserError and swallowing exception
# that caused read to fail.
handle = open(self.csv_shiftjs, "rb")
codec = codecs.lookup("utf-8")
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/tests/parser/test_textreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_skip_bad_lines(self):

reader = TextReader(StringIO(data), delimiter=':',
header=None)
self.assertRaises(parser.CParserError, reader.read)
self.assertRaises(parser.ParserError, reader.read)

reader = TextReader(StringIO(data), delimiter=':',
header=None,
Expand Down Expand Up @@ -197,7 +197,7 @@ def test_header_not_enough_lines(self):
assert_array_dicts_equal(expected, recs)

# not enough rows
self.assertRaises(parser.CParserError, TextReader, StringIO(data),
self.assertRaises(parser.ParserError, TextReader, StringIO(data),
delimiter=',', header=5, as_recarray=True)

def test_header_not_enough_lines_as_recarray(self):
Expand All @@ -218,7 +218,7 @@ def test_header_not_enough_lines_as_recarray(self):
assert_array_dicts_equal(expected, recs)

# not enough rows
self.assertRaises(parser.CParserError, TextReader, StringIO(data),
self.assertRaises(parser.ParserError, TextReader, StringIO(data),
delimiter=',', header=5, as_recarray=True)

def test_escapechar(self):
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/tests/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pandas.util.testing as tm

from pandas.compat import StringIO
from pandas.io.common import CParserError
from pandas.io.common import ParserError
from pandas.io.parsers import read_csv, read_table


Expand Down Expand Up @@ -78,10 +78,10 @@ def test_c_engine(self):
x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
msg = 'Error tokenizing data'

with tm.assertRaisesRegexp(CParserError, msg):
read_table(StringIO(text), sep=r'\s+')
with tm.assertRaisesRegexp(CParserError, msg):
read_table(StringIO(text), engine='c', sep=r'\s+')
with tm.assertRaisesRegexp(ParserError, msg):
read_table(StringIO(text), sep='\s+')
with tm.assertRaisesRegexp(ParserError, msg):
read_table(StringIO(text), engine='c', sep='\s+')

msg = "Only length-1 thousands markers supported"
data = """A|B|C
Expand Down
18 changes: 18 additions & 0 deletions pandas/io/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pandas.compat import is_platform_windows, StringIO

from pandas import read_csv, concat
import pandas as pd

try:
from pathlib import Path
Expand Down Expand Up @@ -88,6 +89,23 @@ def test_iterator(self):
tm.assert_frame_equal(first, expected.iloc[[0]])
tm.assert_frame_equal(concat(it), expected.iloc[1:])

def test_error_rename(self):
# see gh-12665
try:
raise common.CParserError()
except common.ParserError:
pass

try:
raise common.ParserError()
except common.CParserError:
pass

try:
raise common.ParserError()
except pd.parser.CParserError:
pass


class TestMMapWrapper(tm.TestCase):

Expand Down
4 changes: 2 additions & 2 deletions pandas/io/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
is_platform_windows)
from pandas.io.common import URLError, urlopen, file_path_to_url
from pandas.io.html import read_html
from pandas.parser import CParserError
from pandas.parser import ParserError

import pandas.util.testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf, network
Expand Down Expand Up @@ -652,7 +652,7 @@ def test_parse_dates_combine(self):

def test_computer_sales_page(self):
data = os.path.join(DATA_PATH, 'computer_sales_page.html')
with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are "
with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are "
"too many rows for this multi_index "
"of columns"):
self.read_html(data, header=[0, 1])
Expand Down
17 changes: 10 additions & 7 deletions pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ from cpython cimport (PyObject, PyBytes_FromString,
PyUnicode_Check, PyUnicode_AsUTF8String,
PyErr_Occurred, PyErr_Fetch)
from cpython.ref cimport PyObject, Py_XDECREF
from io.common import CParserError, DtypeWarning, EmptyDataError
from io.common import ParserError, DtypeWarning, EmptyDataError

# Import CParserError as alias of ParserError for backwards compatibility.
# Ultimately, we want to remove this import. See gh-12665 and gh-14479.
from io.common import CParserError

cdef extern from "Python.h":
object PyUnicode_FromString(char *v)
Expand Down Expand Up @@ -719,7 +722,7 @@ cdef class TextReader:
if isinstance(msg, list):
msg = "[%s], len of %d," % (
','.join([ str(m) for m in msg ]), len(msg))
raise CParserError(
raise ParserError(
'Passed header=%s but only %d lines in file'
% (msg, self.parser.lines))

Expand Down Expand Up @@ -812,7 +815,7 @@ cdef class TextReader:
passed_count = len(header[0])

# if passed_count > field_count:
# raise CParserError('Column names have %d fields, '
# raise ParserError('Column names have %d fields, '
# 'data has %d fields'
# % (passed_count, field_count))

Expand Down Expand Up @@ -1004,7 +1007,7 @@ cdef class TextReader:
(num_cols >= self.parser.line_fields[i]) * num_cols

if self.table_width - self.leading_cols > num_cols:
raise CParserError(
raise ParserError(
"Too many columns specified: expected %s and found %s" %
(self.table_width - self.leading_cols, num_cols))

Expand Down Expand Up @@ -1059,7 +1062,7 @@ cdef class TextReader:
self.use_unsigned)

if col_res is None:
raise CParserError('Unable to parse column %d' % i)
raise ParserError('Unable to parse column %d' % i)

results[i] = col_res

Expand Down Expand Up @@ -1310,7 +1313,7 @@ def _is_file_like(obj):
if PY3:
import io
if isinstance(obj, io.TextIOWrapper):
raise CParserError('Cannot handle open unicode files (yet)')
raise ParserError('Cannot handle open unicode files (yet)')

# BufferedReader is a byte reader for Python 3
file = io.BufferedReader
Expand Down Expand Up @@ -2015,7 +2018,7 @@ cdef raise_parser_error(object base, parser_t *parser):
else:
message += 'no error message set'

raise CParserError(message)
raise ParserError(message)


def _concatenate_chunks(list chunks):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from pandas.compat import (lmap, range, lrange, StringIO, u)
from pandas.parser import CParserError
from pandas.parser import ParserError
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
date_range, read_csv, compat, to_datetime)
import pandas as pd
Expand Down Expand Up @@ -589,7 +589,7 @@ def _make_frame(names=None):

for i in [5, 6, 7]:
msg = 'len of {i}, but only 5 lines in file'.format(i=i)
with assertRaisesRegexp(CParserError, msg):
with assertRaisesRegexp(ParserError, msg):
read_csv(path, tupleize_cols=False,
header=lrange(i), index_col=0)

Expand Down