BUG: read_csv may interpret second row as index names even if index_col is False (#47397)

phofl · web-flow · commit fd9b2a4083cd · 2022-06-21T11:45:41.000-07:00
* BUG: read_csv may interpret second row as index names even if header is integer

* BUG: read_csv may interpret second row as index names even if index_col is False

* BUG: read_csv may interpret second row as index names even if index_col is False
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -861,6 +861,7 @@ I/O
 - Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`)
 - Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`)
 - Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
+- Bug in :func:`read_csv` interpreting second row as :class:`Index` names even when ``index_col=False`` (:issue:`46569`)
 - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
 - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
 - Bug in :func:`read_html` where elements surrounding ``<br>`` were joined without a space between them (:issue:`29528`)
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -933,7 +933,11 @@ def _get_index_name(
                 implicit_first_cols = len(line) - self.num_original_columns
 
             # Case 0
-            if next_line is not None and self.header is not None:
+            if (
+                next_line is not None
+                and self.header is not None
+                and index_col is not False
+            ):
                 if len(next_line) == len(line) + self.num_original_columns:
                     # column and index names on diff rows
                     self.index_col = list(range(len(line)))
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -466,6 +466,17 @@ def test_index_col_false_and_header_none(python_parser_only):
 0.5,0.03
 0.1,0.2,0.3,2
 """
-    result = parser.read_csv(StringIO(data), sep=",", header=None, index_col=False)
+    with tm.assert_produces_warning(ParserWarning, match="Length of header"):
+        result = parser.read_csv(StringIO(data), sep=",", header=None, index_col=False)
     expected = DataFrame({0: [0.5, 0.1], 1: [0.03, 0.2]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_header_int_do_not_infer_multiindex_names_on_different_line(python_parser_only):
+    # GH#46569
+    parser = python_parser_only
+    data = StringIO("a\na,b\nc,d,e\nf,g,h")
+    with tm.assert_produces_warning(ParserWarning, match="Length of header"):
+        result = parser.read_csv(data, engine="python", index_col=False)
+    expected = DataFrame({"a": ["a", "c", "f"]})
+    tm.assert_frame_equal(result, expected)