parametrized multiindex test & fixed the args to index_col

saucoide · saucoide · commit 1603426d89ab · 2021-05-22T19:01:43.000+02:00
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -925,7 +925,7 @@ I/O
 - Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
 - Bug in :func:`read_excel` would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
-- Bug in :func:`read_clipboard` when copying from excel and the first column contains null values (:issue:`41108`)
+- Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -65,7 +65,7 @@ def read_clipboard(sep=r"\s+", **kwargs):  # pragma: no cover
         # to account for index columns
         index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
         if index_length != 0:
-            kwargs.setdefault("index_col", [0, index_length - 1])
+            kwargs.setdefault("index_col", list(range(index_length)))
 
     # Edge case where sep is specified to be None, return to default
     if sep is None and kwargs.get("delim_whitespace") is None:
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -5,7 +5,6 @@
 
 from pandas import (
     DataFrame,
-    MultiIndex,
     get_option,
     read_clipboard,
 )
@@ -257,16 +256,34 @@ def test_infer_excel_with_nulls(self, request, mock_clipboard):
         # excel data is parsed correctly
         tm.assert_frame_equal(df, df_expected)
 
-    def test_infer_excel_with_multiindex(self, request, mock_clipboard):
+    @pytest.mark.parametrize(
+        "multiindex",
+        [
+            (
+                """\t\t\tcol1\tcol2
+                A\t0\tTrue\t1\tred
+                A\t1\tTrue\t\tblue
+                B\t0\tFalse\t2\tgreen""",
+                [["A", "A", "B"], [0, 1, 0], [True, True, False]],
+            ),
+            (
+                """\t\tcol1\tcol2
+                A\t0\t1\tred
+                A\t1\t\tblue
+                B\t0\t2\tgreen""",
+                [["A", "A", "B"], [0, 1, 0]],
+            ),
+        ],
+    )
+    def test_infer_excel_with_multiindex(self, request, mock_clipboard, multiindex):
         # GH41108
-        text = "\t\tcol1\tcol2\nA\t0\t1\tred\nA\t1\t\tblue\nB\t0\t2\tgreen"
 
-        mock_clipboard[request.node.name] = text
+        # the `.replace()` is because `.dedent()` does not like the leading `\t`
+        mock_clipboard[request.node.name] = multiindex[0].replace(" ", "")
         df = read_clipboard()
-        multiindex = MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0)])
         df_expected = DataFrame(
             data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]},
-            index=multiindex,
+            index=multiindex[1],
         )
 
         # excel data is parsed correctly