TST/CLN: break up & parametrize tests for df.set_index

h-vetinari · h-vetinari · commit 824e96bf1ec6 · 2018-08-23T17:21:57.000+02:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3862,10 +3862,29 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         -------
         dataframe : DataFrame
         """
-        inplace = validate_bool_kwarg(inplace, 'inplace')
+        from pandas import Series
+
         if not isinstance(keys, list):
             keys = [keys]
 
+        # collect elements from "keys" that are not allowed array types
+        col_labels = [x for x in keys
+                      if not isinstance(x, (Series, Index, MultiIndex,
+                                            list, np.ndarray))]
+        if any(x not in self for x in col_labels):
+            # if there are any labels that are invalid, we raise a KeyError
+            missing = [x for x in col_labels if x not in self]
+            raise KeyError('{}'.format(missing))
+
+        elif len(set(col_labels)) < len(col_labels):
+            # if all are valid labels, but there are duplicates
+            dup = Series(col_labels)
+            dup = list(dup.loc[dup.duplicated()])
+            raise ValueError('Passed duplicate column names '
+                             'to keys: {dup}'.format(dup=dup))
+
+        inplace = validate_bool_kwarg(inplace, 'inplace')
+
         if inplace:
             frame = self
         else:
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -0,0 +1,121 @@
+import pytest
+
+import numpy as np
+
+from pandas import compat
+import pandas.util.testing as tm
+from pandas import DataFrame, date_range, NaT
+
+
+@pytest.fixture
+def frame():
+    return DataFrame(tm.getSeriesData())
+
+
+@pytest.fixture
+def frame2():
+    return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A'])
+
+
+@pytest.fixture
+def intframe():
+    df = DataFrame({k: v.astype(int)
+                   for k, v in compat.iteritems(tm.getSeriesData())})
+    # force these all to int64 to avoid platform testing issues
+    return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64)
+
+
+@pytest.fixture
+def tsframe():
+    return DataFrame(tm.getTimeSeriesData())
+
+
+@pytest.fixture
+def mixed_frame():
+    df = DataFrame(tm.getSeriesData())
+    df['foo'] = 'bar'
+    return df
+
+
+@pytest.fixture
+def mixed_float():
+    df = DataFrame(tm.getSeriesData())
+    df.A = df.A.astype('float16')
+    df.B = df.B.astype('float32')
+    df.C = df.C.astype('float64')
+    return df
+
+
+@pytest.fixture
+def mixed_float2():
+    df = DataFrame(tm.getSeriesData())
+    df.D = df.D.astype('float16')
+    df.C = df.C.astype('float32')
+    df.B = df.B.astype('float64')
+    return df
+
+
+@pytest.fixture
+def mixed_int():
+    df = DataFrame({k: v.astype(int)
+                   for k, v in compat.iteritems(tm.getSeriesData())})
+    df.A = df.A.astype('uint8')
+    df.B = df.B.astype('int32')
+    df.C = df.C.astype('int64')
+    df.D = np.ones(len(df.D), dtype='uint64')
+    return df
+
+
+@pytest.fixture
+def all_mixed():
+    return DataFrame({'a': 1., 'b': 2, 'c': 'foo',
+                      'float32': np.array([1.] * 10, dtype='float32'),
+                      'int32': np.array([1] * 10, dtype='int32')},
+                     index=np.arange(10))
+
+
+@pytest.fixture
+def tzframe():
+    df = DataFrame({'A': date_range('20130101', periods=3),
+                    'B': date_range('20130101', periods=3,
+                                    tz='US/Eastern'),
+                    'C': date_range('20130101', periods=3,
+                                    tz='CET')})
+    df.iloc[1, 1] = NaT
+    df.iloc[1, 2] = NaT
+    return df
+
+
+@pytest.fixture
+def empty():
+    return DataFrame({})
+
+
+@pytest.fixture
+def ts1():
+    return tm.makeTimeSeries(nper=30)
+
+
+@pytest.fixture
+def ts2():
+    return tm.makeTimeSeries(nper=30)[5:]
+
+
+@pytest.fixture
+def simple():
+    arr = np.array([[1., 2., 3.],
+                    [4., 5., 6.],
+                    [7., 8., 9.]])
+
+    return DataFrame(arr, columns=['one', 'two', 'three'],
+                     index=['a', 'b', 'c'])
+
+
+@pytest.fixture
+def frame_of_index_cols():
+    df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
+                    'B': ['one', 'two', 'three', 'one', 'two'],
+                    'C': ['a', 'b', 'c', 'd', 'e'],
+                    'D': np.random.randn(5),
+                    'E': np.random.randn(5)})
+    return df
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py