pandas-dev · bkandel · Nov 6, 2016 · Nov 6, 2016 · Nov 7, 2016 · Nov 7, 2016
diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
@@ -29,7 +29,7 @@ Bug Fixes
 
 - Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
 - Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
-
+- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 
 
 

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1509,10 +1509,11 @@ def read(self, nrows=None):
             if self._first_chunk:
                 self._first_chunk = False
                 names = self._maybe_dedup_names(self.orig_names)
-
                 index, columns, col_dict = _get_empty_meta(
                     names, self.index_col, self.index_names,
                     dtype=self.kwds.get('dtype'))
+                columns = self._maybe_make_multi_index_columns(
+                    columns, self.col_names)
 
                 if self.usecols is not None:
                     columns = self._filter_usecols(columns)
@@ -1979,8 +1980,11 @@ def read(self, rows=None):
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
-            return _get_empty_meta(names, self.index_col,
-                                   self.index_names)
+            index, columns, col_dict = _get_empty_meta(
+                names, self.index_col, self.index_names)
+            columns = self._maybe_make_multi_index_columns(
+                columns, self.col_names)
+            return index, columns, col_dict
 
         # handle new style for names in index
         count_empty_content_vals = count_empty_vals(content[0])
@@ -2083,6 +2087,12 @@ def _infer_columns(self):
                     # We have an empty file, so check
                     # if columns are provided. That will
                     # serve as the 'line' for parsing
+                    if have_mi_columns and hr > 0:
+                        if clear_buffer:
+                            self._clear_buffer()
+                        columns.append([None] * len(columns[-1]))
+                        return columns, num_original_columns
+
                     if not self.names:
                         raise EmptyDataError(
                             "No columns to parse from file")

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -606,6 +606,28 @@ def test_multi_index_no_level_names(self):
         expected = self.read_csv(StringIO(data), index_col=[1, 0])
         tm.assert_frame_equal(df, expected, check_names=False)
 
+    def test_multi_index_blank_df(self):
+        # GH 14545
+        data = """a,b
+"""
+        df = self.read_csv(StringIO(data), header=[0])
+        expected = DataFrame(columns=['a', 'b'])
+        tm.assert_frame_equal(df, expected)
+        round_trip = self.read_csv(StringIO(
+            expected.to_csv(index=False)), header=[0])
+        tm.assert_frame_equal(round_trip, expected)
+
+        data_multiline = """a,b
+c,d
+"""
+        df2 = self.read_csv(StringIO(data_multiline), header=[0, 1])
+        cols = MultiIndex.from_tuples([('a', 'c'), ('b', 'd')])
+        expected2 = DataFrame(columns=cols)
+        tm.assert_frame_equal(df2, expected2)
+        round_trip = self.read_csv(StringIO(
+            expected2.to_csv(index=False)), header=[0, 1])
+        tm.assert_frame_equal(round_trip, expected2)
+
     def test_no_unnamed_index(self):
         data = """ id c0 c1 c2
 0 1 0 a b

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -717,7 +717,9 @@ cdef class TextReader:
                     start = self.parser.line_start[0]
 
                 # e.g., if header=3 and file only has 2 lines
-                elif self.parser.lines < hr + 1:
+                elif (self.parser.lines < hr + 1
+                      and not isinstance(self.orig_header, list)) or (
+                          self.parser.lines < hr):
                     msg = self.orig_header
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (
@@ -940,7 +942,7 @@ cdef class TextReader:
                 raise_parser_error('Error tokenizing data', self.parser)
             footer = self.skipfooter
 
-        if self.parser_start == self.parser.lines:
+        if self.parser_start >= self.parser.lines:
             raise StopIteration
         self._end_clock('Tokenization')
 

diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -587,7 +587,7 @@ def _make_frame(names=None):
             df = _make_frame(True)
             df.to_csv(path, tupleize_cols=False)
 
-            for i in [5, 6, 7]:
+            for i in [6, 7]:
                 msg = 'len of {i}, but only 5 lines in file'.format(i=i)
                 with assertRaisesRegexp(ParserError, msg):
                     read_csv(path, tupleize_cols=False,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -29,7 +29,7 @@ Bug Fixes

		- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
		- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)

		- Bug in ``pd.read_csv`` where reading files fails if the number of headers is equal to the number of lines in the file (:issue:`14515`)



Expand Down