pandas-dev · jreback · Jul 28, 2016 · Jul 27, 2016
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -788,3 +788,4 @@ Bug Fixes
 - Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`)
 
 - Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`)
+- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1871,6 +1871,10 @@ class MyDialect(csv.Dialect):
         else:
             def _read():
                 line = f.readline()
+
+                if compat.PY2 and self.encoding:
+                    line = line.decode(self.encoding)
+
                 pat = re.compile(sep)
                 yield pat.split(line.strip())
                 for line in f:

diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py
@@ -201,3 +201,19 @@ def test_skipfooter_with_decimal(self):
         result = self.read_csv(StringIO(data), names=['a'],
                                decimal='#', skipfooter=1)
         tm.assert_frame_equal(result, expected)
+
+    def test_encoding_non_utf8_multichar_sep(self):
+        # see gh-3404
+        expected = DataFrame({'a': [1], 'b': [2]})
+
+        for sep in ['::', '#####', '!!!', '123', '#1!c5',
+                    '%!c!d', '@@#4:2', '_!pd#_']:
+            data = '1' + sep + '2'
+
+            for encoding in ['utf-16', 'utf-16-be', 'utf-16-le',
+                             'utf-32', 'cp037']:
+                encoded_data = data.encode(encoding)
+                result = self.read_csv(BytesIO(encoded_data),
+                                       sep=sep, names=['a', 'b'],
+                                       encoding=encoding)
+                tm.assert_frame_equal(result, expected)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -788,3 +788,4 @@ Bug Fixes
		- Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`)

		- Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`)
		- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)