pandas-dev · gfyoung · Oct 25, 2016
diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
@@ -35,6 +35,7 @@ Bug Fixes
 
 
 
+- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
 - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
 - Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)
 

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1759,6 +1759,9 @@ def __init__(self, f, **kwds):
         self.delimiter = kwds['delimiter']
 
         self.quotechar = kwds['quotechar']
+        if isinstance(self.quotechar, compat.text_type):
+            self.quotechar = str(self.quotechar)
+
         self.escapechar = kwds['escapechar']
         self.doublequote = kwds['doublequote']
         self.skipinitialspace = kwds['skipinitialspace']

diff --git a/pandas/io/tests/parser/quoting.py b/pandas/io/tests/parser/quoting.py
@@ -9,7 +9,7 @@
 import pandas.util.testing as tm
 
 from pandas import DataFrame
-from pandas.compat import StringIO
+from pandas.compat import PY3, StringIO, u
 
 
 class QuotingTests(object):
@@ -138,3 +138,16 @@ def test_double_quote(self):
         result = self.read_csv(StringIO(data), quotechar='"',
                                doublequote=False)
         tm.assert_frame_equal(result, expected)
+
+    def test_quotechar_unicode(self):
+        # See gh-14477
+        data = 'a\n1'
+        expected = DataFrame({'a': [1]})
+
+        result = self.read_csv(StringIO(data), quotechar=u('"'))
+        tm.assert_frame_equal(result, expected)
+
+        # Compared to Python 3.x, Python 2.x does not handle unicode well.
+        if PY3:
+            result = self.read_csv(StringIO(data), quotechar=u('\u0394'))
+            tm.assert_frame_equal(result, expected)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -570,7 +570,8 @@ cdef class TextReader:
         if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
             raise TypeError('bad "quoting" value')
 
-        if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
+        if not isinstance(quote_char, (str, compat.text_type,
+                                       bytes)) and quote_char is not None:
             dtype = type(quote_char).__name__
             raise TypeError('"quotechar" must be string, '
                             'not {dtype}'.format(dtype=dtype))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,6 +35,7 @@ Bug Fixes



		- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
		- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
		- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)

Expand Down