TST: More pytest.mark.parameterize (#45115)

mroeschke · web-flow · commit de82ced9d6a4 · 2021-12-30T09:08:56.000-05:00
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -775,16 +775,6 @@ def test_constructor_dict_of_generators(self):
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_dict_multiindex(self):
-        def check(result, expected):
-            return tm.assert_frame_equal(
-                result,
-                expected,
-                check_dtype=True,
-                check_index_type=True,
-                check_column_type=True,
-                check_names=True,
-            )
-
         d = {
             ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2},
             ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4},
@@ -796,7 +786,10 @@ def check(result, expected):
             [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
         ).T
         expected.index = MultiIndex.from_tuples(expected.index)
-        check(df, expected)
+        tm.assert_frame_equal(
+            df,
+            expected,
+        )
 
         d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111}
         _d.insert(0, ("z", d["z"]))
@@ -806,7 +799,7 @@ def check(result, expected):
         expected.index = Index(expected.index, tupleize_cols=False)
         df = DataFrame(d)
         df = df.reindex(columns=expected.columns, index=expected.index)
-        check(df, expected)
+        tm.assert_frame_equal(df, expected)
 
     def test_constructor_dict_datetime64_index(self):
         # GH 10160
@@ -2167,44 +2160,38 @@ def test_constructor_series_copy(self, float_frame):
 
         assert not (series["A"] == 5).all()
 
-    def test_constructor_with_nas(self):
+    @pytest.mark.parametrize(
+        "df",
+        [
+            DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]),
+            DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]),
+            DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]),
+            DataFrame(
+                [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]
+            ),
+            DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]),
+        ],
+    )
+    def test_constructor_with_nas(self, df):
         # GH 5016
         # na's in indices
+        # GH 21428 (non-unique columns)
 
-        def check(df):
-            for i in range(len(df.columns)):
-                df.iloc[:, i]
-
-            indexer = np.arange(len(df.columns))[isna(df.columns)]
-
-            # No NaN found -> error
-            if len(indexer) == 0:
-                with pytest.raises(KeyError, match="^nan$"):
-                    df.loc[:, np.nan]
-            # single nan should result in Series
-            elif len(indexer) == 1:
-                tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan])
-            # multiple nans should result in DataFrame
-            else:
-                tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan])
-
-        df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
-        check(df)
-
-        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan])
-        check(df)
-
-        df = DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan])
-        check(df)
+        for i in range(len(df.columns)):
+            df.iloc[:, i]
 
-        df = DataFrame(
-            [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]
-        )
-        check(df)
+        indexer = np.arange(len(df.columns))[isna(df.columns)]
 
-        # GH 21428 (non-unique columns)
-        df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2])
-        check(df)
+        # No NaN found -> error
+        if len(indexer) == 0:
+            with pytest.raises(KeyError, match="^nan$"):
+                df.loc[:, np.nan]
+        # single nan should result in Series
+        elif len(indexer) == 1:
+            tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan])
+        # multiple nans should result in DataFrame
+        else:
+            tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan])
 
     def test_constructor_lists_to_object_dtype(self):
         # from #1074
diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py
@@ -130,35 +130,32 @@ class TestGroupVarFloat32(GroupVarTestMixin):
     rtol = 1e-2
 
 
-def test_group_ohlc():
-    def _check(dtype):
-        obj = np.array(np.random.randn(20), dtype=dtype)
+@pytest.mark.parametrize("dtype", ["float32", "float64"])
+def test_group_ohlc(dtype):
+    obj = np.array(np.random.randn(20), dtype=dtype)
 
-        bins = np.array([6, 12, 20])
-        out = np.zeros((3, 4), dtype)
-        counts = np.zeros(len(out), dtype=np.int64)
-        labels = ensure_platform_int(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
+    bins = np.array([6, 12, 20])
+    out = np.zeros((3, 4), dtype)
+    counts = np.zeros(len(out), dtype=np.int64)
+    labels = ensure_platform_int(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
 
-        func = libgroupby.group_ohlc
-        func(out, counts, obj[:, None], labels)
+    func = libgroupby.group_ohlc
+    func(out, counts, obj[:, None], labels)
 
-        def _ohlc(group):
-            if isna(group).all():
-                return np.repeat(np.nan, 4)
-            return [group[0], group.max(), group.min(), group[-1]]
+    def _ohlc(group):
+        if isna(group).all():
+            return np.repeat(np.nan, 4)
+        return [group[0], group.max(), group.min(), group[-1]]
 
-        expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
+    expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
 
-        tm.assert_almost_equal(out, expected)
-        tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
+    tm.assert_almost_equal(out, expected)
+    tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
 
-        obj[:6] = np.nan
-        func(out, counts, obj[:, None], labels)
-        expected[0] = np.nan
-        tm.assert_almost_equal(out, expected)
-
-    _check("float32")
-    _check("float64")
+    obj[:6] = np.nan
+    func(out, counts, obj[:, None], labels)
+    expected[0] = np.nan
+    tm.assert_almost_equal(out, expected)
 
 
 def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py
@@ -178,49 +178,44 @@ def test_has_duplicates_from_tuples():
     assert not mi.has_duplicates
 
 
-def test_has_duplicates_overflow():
+@pytest.mark.parametrize("nlevels", [4, 8])
+@pytest.mark.parametrize("with_nulls", [True, False])
+def test_has_duplicates_overflow(nlevels, with_nulls):
     # handle int64 overflow if possible
-    def check(nlevels, with_nulls):
-        codes = np.tile(np.arange(500), 2)
-        level = np.arange(500)
+    # no overflow with 4
+    # overflow possible with 8
+    codes = np.tile(np.arange(500), 2)
+    level = np.arange(500)
 
-        if with_nulls:  # inject some null values
-            codes[500] = -1  # common nan value
-            codes = [codes.copy() for i in range(nlevels)]
-            for i in range(nlevels):
-                codes[i][500 + i - nlevels // 2] = -1
+    if with_nulls:  # inject some null values
+        codes[500] = -1  # common nan value
+        codes = [codes.copy() for i in range(nlevels)]
+        for i in range(nlevels):
+            codes[i][500 + i - nlevels // 2] = -1
 
-            codes += [np.array([-1, 1]).repeat(500)]
-        else:
-            codes = [codes] * nlevels + [np.arange(2).repeat(500)]
+        codes += [np.array([-1, 1]).repeat(500)]
+    else:
+        codes = [codes] * nlevels + [np.arange(2).repeat(500)]
 
-        levels = [level] * nlevels + [[0, 1]]
+    levels = [level] * nlevels + [[0, 1]]
 
-        # no dups
-        mi = MultiIndex(levels=levels, codes=codes)
-        assert not mi.has_duplicates
-
-        # with a dup
-        if with_nulls:
-
-            def f(a):
-                return np.insert(a, 1000, a[0])
+    # no dups
+    mi = MultiIndex(levels=levels, codes=codes)
+    assert not mi.has_duplicates
 
-            codes = list(map(f, codes))
-            mi = MultiIndex(levels=levels, codes=codes)
-        else:
-            values = mi.values.tolist()
-            mi = MultiIndex.from_tuples(values + [values[0]])
+    # with a dup
+    if with_nulls:
 
-        assert mi.has_duplicates
+        def f(a):
+            return np.insert(a, 1000, a[0])
 
-    # no overflow
-    check(4, False)
-    check(4, True)
+        codes = list(map(f, codes))
+        mi = MultiIndex(levels=levels, codes=codes)
+    else:
+        values = mi.values.tolist()
+        mi = MultiIndex.from_tuples(values + [values[0]])
 
-    # overflow possible
-    check(8, False)
-    check(8, True)
+    assert mi.has_duplicates
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
@@ -670,23 +670,24 @@ def test_append_misc(setup_path):
         result = store.select("df1")
         tm.assert_frame_equal(result, df)
 
-    # more chunksize in append tests
-    def check(obj, comparator):
-        for c in [10, 200, 1000]:
-            with ensure_clean_store(setup_path, mode="w") as store:
-                store.append("obj", obj, chunksize=c)
-                result = store.select("obj")
-                comparator(result, obj)
 
+@pytest.mark.parametrize("chunksize", [10, 200, 1000])
+def test_append_misc_chunksize(setup_path, chunksize):
+    # more chunksize in append tests
     df = tm.makeDataFrame()
     df["string"] = "foo"
     df["float322"] = 1.0
     df["float322"] = df["float322"].astype("float32")
     df["bool"] = df["float322"] > 0
     df["time1"] = Timestamp("20130101")
     df["time2"] = Timestamp("20130102")
-    check(df, tm.assert_frame_equal)
+    with ensure_clean_store(setup_path, mode="w") as store:
+        store.append("obj", df, chunksize=chunksize)
+        result = store.select("obj")
+        tm.assert_frame_equal(result, df)
+
 
+def test_append_misc_empty_frame(setup_path):
     # empty frame, GH4273
     with ensure_clean_store(setup_path) as store:
 
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
@@ -29,71 +29,64 @@
 pytestmark = pytest.mark.single
 
 
-def test_mode(setup_path):
+@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
+def test_mode(setup_path, mode):
 
     df = tm.makeTimeDataFrame()
+    msg = r"[\S]* does not exist"
+    with ensure_clean_path(setup_path) as path:
 
-    def check(mode):
-
-        msg = r"[\S]* does not exist"
-        with ensure_clean_path(setup_path) as path:
-
-            # constructor
-            if mode in ["r", "r+"]:
-                with pytest.raises(OSError, match=msg):
-                    HDFStore(path, mode=mode)
+        # constructor
+        if mode in ["r", "r+"]:
+            with pytest.raises(OSError, match=msg):
+                HDFStore(path, mode=mode)
 
-            else:
-                store = HDFStore(path, mode=mode)
-                assert store._handle.mode == mode
-                store.close()
+        else:
+            store = HDFStore(path, mode=mode)
+            assert store._handle.mode == mode
+            store.close()
 
-        with ensure_clean_path(setup_path) as path:
+    with ensure_clean_path(setup_path) as path:
 
-            # context
-            if mode in ["r", "r+"]:
-                with pytest.raises(OSError, match=msg):
-                    with HDFStore(path, mode=mode) as store:
-                        pass
-            else:
+        # context
+        if mode in ["r", "r+"]:
+            with pytest.raises(OSError, match=msg):
                 with HDFStore(path, mode=mode) as store:
-                    assert store._handle.mode == mode
+                    pass
+        else:
+            with HDFStore(path, mode=mode) as store:
+                assert store._handle.mode == mode
 
-        with ensure_clean_path(setup_path) as path:
+    with ensure_clean_path(setup_path) as path:
 
-            # conv write
-            if mode in ["r", "r+"]:
-                with pytest.raises(OSError, match=msg):
-                    df.to_hdf(path, "df", mode=mode)
-                df.to_hdf(path, "df", mode="w")
-            else:
+        # conv write
+        if mode in ["r", "r+"]:
+            with pytest.raises(OSError, match=msg):
                 df.to_hdf(path, "df", mode=mode)
-
-            # conv read
-            if mode in ["w"]:
-                msg = (
-                    "mode w is not allowed while performing a read. "
-                    r"Allowed modes are r, r\+ and a."
-                )
-                with pytest.raises(ValueError, match=msg):
-                    read_hdf(path, "df", mode=mode)
-            else:
-                result = read_hdf(path, "df", mode=mode)
-                tm.assert_frame_equal(result, df)
-
-    def check_default_mode():
-
-        # read_hdf uses default mode
-        with ensure_clean_path(setup_path) as path:
             df.to_hdf(path, "df", mode="w")
-            result = read_hdf(path, "df")
+        else:
+            df.to_hdf(path, "df", mode=mode)
+
+        # conv read
+        if mode in ["w"]:
+            msg = (
+                "mode w is not allowed while performing a read. "
+                r"Allowed modes are r, r\+ and a."
+            )
+            with pytest.raises(ValueError, match=msg):
+                read_hdf(path, "df", mode=mode)
+        else:
+            result = read_hdf(path, "df", mode=mode)
             tm.assert_frame_equal(result, df)
 
-    check("r")
-    check("r+")
-    check("a")
-    check("w")
-    check_default_mode()
+
+def test_default_mode(setup_path):
+    # read_hdf uses default mode
+    df = tm.makeTimeDataFrame()
+    with ensure_clean_path(setup_path) as path:
+        df.to_hdf(path, "df", mode="w")
+        result = read_hdf(path, "df")
+        tm.assert_frame_equal(result, df)
 
 
 def test_reopen_handle(setup_path):