Skip to content

TST: Split / parameterize reshaping tests #45278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions pandas/tests/reshape/test_melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def test_value_vars(self):
)
tm.assert_frame_equal(result4, expected4)

def test_value_vars_types(self):
@pytest.mark.parametrize("type_", (tuple, list, np.array))
def test_value_vars_types(self, type_):
# GH 15348
expected = DataFrame(
{
Expand All @@ -86,10 +87,8 @@ def test_value_vars_types(self):
},
columns=["id1", "id2", "variable", "value"],
)

for type_ in (tuple, list, np.array):
result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B")))
tm.assert_frame_equal(result, expected)
result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B")))
tm.assert_frame_equal(result, expected)

def test_vars_work_with_multiindex(self):
expected = DataFrame(
Expand Down Expand Up @@ -140,23 +139,21 @@ def test_single_vars_work_with_multiindex(
result = self.df1.melt(id_vars, value_vars, col_level=col_level)
tm.assert_frame_equal(result, expected)

def test_tuple_vars_fail_with_multiindex(self):
@pytest.mark.parametrize(
"id_vars, value_vars",
[
[("A", "a"), [("B", "b")]],
[[("A", "a")], ("B", "b")],
[("A", "a"), ("B", "b")],
],
)
def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars):
# melt should fail with an informative error message if
# the columns have a MultiIndex and a tuple is passed
# for id_vars or value_vars.
tuple_a = ("A", "a")
list_a = [tuple_a]
tuple_b = ("B", "b")
list_b = [tuple_b]

msg = r"(id|value)_vars must be a list of tuples when columns are a MultiIndex"
for id_vars, value_vars in (
(tuple_a, list_b),
(list_a, tuple_b),
(tuple_a, tuple_b),
):
with pytest.raises(ValueError, match=msg):
self.df1.melt(id_vars=id_vars, value_vars=value_vars)
with pytest.raises(ValueError, match=msg):
self.df1.melt(id_vars=id_vars, value_vars=value_vars)

def test_custom_var_name(self):
result5 = self.df.melt(var_name=self.var_name)
Expand Down Expand Up @@ -261,11 +258,10 @@ def test_custom_var_and_value_name(self):
result20 = df20.melt()
assert result20.columns.tolist() == ["foo", "value"]

def test_col_level(self):
res1 = self.df1.melt(col_level=0)
res2 = self.df1.melt(col_level="CAP")
assert res1.columns.tolist() == ["CAP", "value"]
assert res2.columns.tolist() == ["CAP", "value"]
@pytest.mark.parametrize("col_level", [0, "CAP"])
def test_col_level(self, col_level):
res = self.df1.melt(col_level=col_level)
assert res.columns.tolist() == ["CAP", "value"]

def test_multiindex(self):
res = self.df1.melt()
Expand Down Expand Up @@ -633,7 +629,7 @@ def test_pairs(self):
tm.assert_frame_equal(result, exp)

with tm.assert_produces_warning(FutureWarning):
result = lreshape(df, spec, dropna=False, label="foo")
lreshape(df, spec, dropna=False, label="foo")

spec = {
"visitdt": [f"visitdt{i:d}" for i in range(1, 3)],
Expand Down
81 changes: 42 additions & 39 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):

tm.assert_frame_equal(result, expected)

def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
# gh-21378
df = DataFrame(
{
Expand Down Expand Up @@ -493,6 +494,8 @@ def test_pivot_index_with_nan(self, method):
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T)

@pytest.mark.parametrize("method", [True, False])
def test_pivot_index_with_nan_dates(self, method):
# GH9491
df = DataFrame(
{
Expand All @@ -501,8 +504,8 @@ def test_pivot_index_with_nan(self, method):
}
)
df["b"] = df["a"] - pd.Timestamp("2014-02-02")
df.loc[1, "a"] = df.loc[3, "a"] = nan
df.loc[1, "b"] = df.loc[4, "b"] = nan
df.loc[1, "a"] = df.loc[3, "a"] = np.nan
df.loc[1, "b"] = df.loc[4, "b"] = np.nan

if method:
pv = df.pivot("a", "b", "c")
Expand Down Expand Up @@ -851,33 +854,31 @@ def test_pivot_with_tuple_of_values(self, method):
else:
pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz"))

def test_margins(self):
def _check_output(
result, values_col, index=["A", "B"], columns=["C"], margins_col="All"
):
col_margins = result.loc[result.index[:-1], margins_col]
expected_col_margins = self.data.groupby(index)[values_col].mean()
tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
assert col_margins.name == margins_col

result = result.sort_index()
index_margins = result.loc[(margins_col, "")].iloc[:-1]

expected_ix_margins = self.data.groupby(columns)[values_col].mean()
tm.assert_series_equal(
index_margins, expected_ix_margins, check_names=False
)
assert index_margins.name == (margins_col, "")
def _check_output(
self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All"
):
col_margins = result.loc[result.index[:-1], margins_col]
expected_col_margins = self.data.groupby(index)[values_col].mean()
tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
assert col_margins.name == margins_col

grand_total_margins = result.loc[(margins_col, ""), margins_col]
expected_total_margins = self.data[values_col].mean()
assert grand_total_margins == expected_total_margins
result = result.sort_index()
index_margins = result.loc[(margins_col, "")].iloc[:-1]

expected_ix_margins = self.data.groupby(columns)[values_col].mean()
tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False)
assert index_margins.name == (margins_col, "")

grand_total_margins = result.loc[(margins_col, ""), margins_col]
expected_total_margins = self.data[values_col].mean()
assert grand_total_margins == expected_total_margins

def test_margins(self):
# column specified
result = self.data.pivot_table(
values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
)
_check_output(result, "D")
self._check_output(result, "D")

# Set a different margins_name (not 'All')
result = self.data.pivot_table(
Expand All @@ -888,15 +889,16 @@ def _check_output(
aggfunc=np.mean,
margins_name="Totals",
)
_check_output(result, "D", margins_col="Totals")
self._check_output(result, "D", margins_col="Totals")

# no column specified
table = self.data.pivot_table(
index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
)
for value_col in table.columns.levels[0]:
_check_output(table[value_col], value_col)
self._check_output(table[value_col], value_col)

def test_no_col(self):
# no col

# to help with a buglet
Expand Down Expand Up @@ -1353,6 +1355,7 @@ def test_pivot_timegrouper(self, using_array_manager):
aggfunc=np.sum,
)

def test_pivot_timegrouper_double(self):
# double grouper
df = DataFrame(
{
Expand Down Expand Up @@ -1633,7 +1636,8 @@ def test_pivot_dtaccessor(self):
)
tm.assert_frame_equal(result, expected)

def test_daily(self):
@pytest.mark.parametrize("i", range(1, 367))
def test_daily(self, i):
rng = date_range("1/1/2000", "12/31/2004", freq="D")
ts = Series(np.random.randn(len(rng)), index=rng)

Expand All @@ -1644,28 +1648,27 @@ def test_daily(self):

doy = np.asarray(ts.index.dayofyear)

for i in range(1, 367):
subset = ts[doy == i]
subset.index = subset.index.year
subset = ts[doy == i]
subset.index = subset.index.year

result = annual[i].dropna()
tm.assert_series_equal(result, subset, check_names=False)
assert result.name == i
result = annual[i].dropna()
tm.assert_series_equal(result, subset, check_names=False)
assert result.name == i

def test_monthly(self):
@pytest.mark.parametrize("i", range(1, 13))
def test_monthly(self, i):
rng = date_range("1/1/2000", "12/31/2004", freq="M")
ts = Series(np.random.randn(len(rng)), index=rng)

annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month)
annual.columns = annual.columns.droplevel(0)

month = ts.index.month
for i in range(1, 13):
subset = ts[month == i]
subset.index = subset.index.year
result = annual[i].dropna()
tm.assert_series_equal(result, subset, check_names=False)
assert result.name == i
subset = ts[month == i]
subset.index = subset.index.year
result = annual[i].dropna()
tm.assert_series_equal(result, subset, check_names=False)
assert result.name == i

def test_pivot_table_with_iterator_values(self):
# GH 12017
Expand Down
43 changes: 26 additions & 17 deletions pandas/tests/reshape/test_union_categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@


class TestUnionCategoricals:
def test_union_categorical(self):
# GH 13361
data = [
@pytest.mark.parametrize(
"a, b, combined",
[
(list("abc"), list("abd"), list("abcabd")),
([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
Expand All @@ -39,14 +39,16 @@ def test_union_categorical(self):
pd.period_range("2014-01-06", "2014-01-07"),
pd.period_range("2014-01-01", "2014-01-07"),
),
]

for a, b, combined in data:
for box in [Categorical, CategoricalIndex, Series]:
result = union_categoricals([box(Categorical(a)), box(Categorical(b))])
expected = Categorical(combined)
tm.assert_categorical_equal(result, expected)
],
)
@pytest.mark.parametrize("box", [Categorical, CategoricalIndex, Series])
def test_union_categorical(self, a, b, combined, box):
# GH 13361
result = union_categoricals([box(Categorical(a)), box(Categorical(b))])
expected = Categorical(combined)
tm.assert_categorical_equal(result, expected)

def test_union_categorical_ordered_appearance(self):
# new categories ordered by appearance
s = Categorical(["x", "y", "z"])
s2 = Categorical(["a", "b", "c"])
Expand All @@ -56,19 +58,22 @@ def test_union_categorical(self):
)
tm.assert_categorical_equal(result, expected)

def test_union_categorical_ordered_true(self):
s = Categorical([0, 1.2, 2], ordered=True)
s2 = Categorical([0, 1.2, 2], ordered=True)
result = union_categoricals([s, s2])
expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
tm.assert_categorical_equal(result, expected)

def test_union_categorical_match_types(self):
# must exactly match types
s = Categorical([0, 1.2, 2])
s2 = Categorical([2, 3, 4])
msg = "dtype of categories must be the same"
with pytest.raises(TypeError, match=msg):
union_categoricals([s, s2])

def test_union_categorical_empty(self):
msg = "No Categoricals to union"
with pytest.raises(ValueError, match=msg):
union_categoricals([])
Expand Down Expand Up @@ -117,14 +122,11 @@ def test_union_categoricals_nan(self):
exp = Categorical([np.nan, np.nan, np.nan, np.nan])
tm.assert_categorical_equal(res, exp)

def test_union_categoricals_empty(self):
@pytest.mark.parametrize("val", [[], ["1"]])
def test_union_categoricals_empty(self, val):
# GH 13759
res = union_categoricals([Categorical([]), Categorical([])])
exp = Categorical([])
tm.assert_categorical_equal(res, exp)

res = union_categoricals([Categorical([]), Categorical(["1"])])
exp = Categorical(["1"])
res = union_categoricals([Categorical([]), Categorical(val)])
exp = Categorical(val)
tm.assert_categorical_equal(res, exp)

def test_union_categorical_same_category(self):
Expand All @@ -135,6 +137,7 @@ def test_union_categorical_same_category(self):
exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4])
tm.assert_categorical_equal(res, exp)

def test_union_categorical_same_category_str(self):
c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"])
c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"])
res = union_categoricals([c1, c2])
Expand Down Expand Up @@ -293,38 +296,44 @@ def test_union_categoricals_sort_false(self):
)
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_fastpath(self):
# fastpath
c1 = Categorical(["a", "b"], categories=["b", "a", "c"])
c2 = Categorical(["b", "c"], categories=["b", "a", "c"])
result = union_categoricals([c1, c2], sort_categories=False)
expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_skipresort(self):
# fastpath - skip resort
c1 = Categorical(["a", "b"], categories=["a", "b", "c"])
c2 = Categorical(["b", "c"], categories=["a", "b", "c"])
result = union_categoricals([c1, c2], sort_categories=False)
expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_one_nan(self):
c1 = Categorical(["x", np.nan])
c2 = Categorical([np.nan, "b"])
result = union_categoricals([c1, c2], sort_categories=False)
expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_only_nan(self):
c1 = Categorical([np.nan])
c2 = Categorical([np.nan])
result = union_categoricals([c1, c2], sort_categories=False)
expected = Categorical([np.nan, np.nan])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_empty(self):
c1 = Categorical([])
c2 = Categorical([])
result = union_categoricals([c1, c2], sort_categories=False)
expected = Categorical([])
tm.assert_categorical_equal(result, expected)

def test_union_categoricals_sort_false_ordered_true(self):
c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True)
c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True)
result = union_categoricals([c1, c2], sort_categories=False)
Expand Down
17 changes: 8 additions & 9 deletions pandas/tests/reshape/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,16 @@ def test_tzaware_retained_categorical(self):
expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

def test_empty(self):
@pytest.mark.parametrize("x, y", [[[], []], [[0, 1], []], [[], ["a", "b", "c"]]])
def test_empty(self, x, y):
# product of empty factors
X = [[], [0, 1], []]
Y = [[], [], ["a", "b", "c"]]
for x, y in zip(X, Y):
expected1 = np.array([], dtype=np.asarray(x).dtype)
expected2 = np.array([], dtype=np.asarray(y).dtype)
result1, result2 = cartesian_product([x, y])
tm.assert_numpy_array_equal(result1, expected1)
tm.assert_numpy_array_equal(result2, expected2)
expected1 = np.array([], dtype=np.asarray(x).dtype)
expected2 = np.array([], dtype=np.asarray(y).dtype)
result1, result2 = cartesian_product([x, y])
tm.assert_numpy_array_equal(result1, expected1)
tm.assert_numpy_array_equal(result2, expected2)

def test_empty_input(self):
# empty product (empty input):
result = cartesian_product([])
expected = []
Expand Down