Skip to content

Commit d8d1dc9

Browse files
authored
REF/CLN: test_get_dummies (#33184)
1 parent af0c878 commit d8d1dc9

File tree

2 files changed

+71
-76
lines changed

2 files changed

+71
-76
lines changed

pandas/tests/reshape/test_reshape.py renamed to pandas/tests/reshape/test_get_dummies.py

+54-76
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.core.dtypes.common import is_integer_dtype
77

88
import pandas as pd
9-
from pandas import Categorical, DataFrame, Index, Series, get_dummies
9+
from pandas import Categorical, CategoricalIndex, DataFrame, Series, get_dummies
1010
import pandas._testing as tm
1111
from pandas.core.arrays.sparse import SparseArray, SparseDtype
1212

@@ -31,11 +31,11 @@ def effective_dtype(self, dtype):
3131
return np.uint8
3232
return dtype
3333

34-
def test_raises_on_dtype_object(self, df):
34+
def test_get_dummies_raises_on_dtype_object(self, df):
3535
with pytest.raises(ValueError):
3636
get_dummies(df, dtype="object")
3737

38-
def test_basic(self, sparse, dtype):
38+
def test_get_dummies_basic(self, sparse, dtype):
3939
s_list = list("abc")
4040
s_series = Series(s_list)
4141
s_series_index = Series(s_list, list("ABC"))
@@ -56,7 +56,7 @@ def test_basic(self, sparse, dtype):
5656
result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
5757
tm.assert_frame_equal(result, expected)
5858

59-
def test_basic_types(self, sparse, dtype):
59+
def test_get_dummies_basic_types(self, sparse, dtype):
6060
# GH 10531
6161
s_list = list("abc")
6262
s_series = Series(s_list)
@@ -106,7 +106,7 @@ def test_basic_types(self, sparse, dtype):
106106
result = result.sort_index()
107107
tm.assert_series_equal(result, expected)
108108

109-
def test_just_na(self, sparse):
109+
def test_get_dummies_just_na(self, sparse):
110110
just_na_list = [np.nan]
111111
just_na_series = Series(just_na_list)
112112
just_na_series_index = Series(just_na_list, index=["A"])
@@ -123,7 +123,7 @@ def test_just_na(self, sparse):
123123
assert res_series.index.tolist() == [0]
124124
assert res_series_index.index.tolist() == ["A"]
125125

126-
def test_include_na(self, sparse, dtype):
126+
def test_get_dummies_include_na(self, sparse, dtype):
127127
s = ["a", "b", np.nan]
128128
res = get_dummies(s, sparse=sparse, dtype=dtype)
129129
exp = DataFrame(
@@ -152,7 +152,7 @@ def test_include_na(self, sparse, dtype):
152152
)
153153
tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
154154

155-
def test_unicode(self, sparse):
155+
def test_get_dummies_unicode(self, sparse):
156156
# See GH 6885 - get_dummies chokes on unicode values
157157
import unicodedata
158158

@@ -175,7 +175,7 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
175175
dtype=np.uint8,
176176
)
177177
if sparse:
178-
expected = pd.DataFrame(
178+
expected = DataFrame(
179179
{
180180
"A_a": SparseArray([1, 0, 1], dtype="uint8"),
181181
"A_b": SparseArray([0, 1, 0], dtype="uint8"),
@@ -223,7 +223,7 @@ def test_dataframe_dummies_prefix_list(self, df, sparse):
223223
cols = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"]
224224
expected = expected[["C"] + cols]
225225

226-
typ = SparseArray if sparse else pd.Series
226+
typ = SparseArray if sparse else Series
227227
expected[cols] = expected[cols].apply(lambda x: typ(x))
228228
tm.assert_frame_equal(result, expected)
229229

@@ -242,11 +242,11 @@ def test_dataframe_dummies_prefix_str(self, df, sparse):
242242
# https://github.com/pandas-dev/pandas/issues/14427
243243
expected = pd.concat(
244244
[
245-
pd.Series([1, 2, 3], name="C"),
246-
pd.Series([1, 0, 1], name="bad_a", dtype="Sparse[uint8]"),
247-
pd.Series([0, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
248-
pd.Series([1, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
249-
pd.Series([0, 0, 1], name="bad_c", dtype="Sparse[uint8]"),
245+
Series([1, 2, 3], name="C"),
246+
Series([1, 0, 1], name="bad_a", dtype="Sparse[uint8]"),
247+
Series([0, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
248+
Series([1, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
249+
Series([0, 0, 1], name="bad_c", dtype="Sparse[uint8]"),
250250
],
251251
axis=1,
252252
)
@@ -267,7 +267,7 @@ def test_dataframe_dummies_subset(self, df, sparse):
267267
expected[["C"]] = df[["C"]]
268268
if sparse:
269269
cols = ["from_A_a", "from_A_b"]
270-
expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0))
270+
expected[cols] = expected[cols].astype(SparseDtype("uint8", 0))
271271
tm.assert_frame_equal(result, expected)
272272

273273
def test_dataframe_dummies_prefix_sep(self, df, sparse):
@@ -286,7 +286,7 @@ def test_dataframe_dummies_prefix_sep(self, df, sparse):
286286
expected = expected[["C", "A..a", "A..b", "B..b", "B..c"]]
287287
if sparse:
288288
cols = ["A..a", "A..b", "B..b", "B..c"]
289-
expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0))
289+
expected[cols] = expected[cols].astype(SparseDtype("uint8", 0))
290290

291291
tm.assert_frame_equal(result, expected)
292292

@@ -323,7 +323,7 @@ def test_dataframe_dummies_prefix_dict(self, sparse):
323323
columns = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"]
324324
expected[columns] = expected[columns].astype(np.uint8)
325325
if sparse:
326-
expected[columns] = expected[columns].astype(pd.SparseDtype("uint8", 0))
326+
expected[columns] = expected[columns].astype(SparseDtype("uint8", 0))
327327

328328
tm.assert_frame_equal(result, expected)
329329

@@ -359,7 +359,7 @@ def test_dataframe_dummies_with_na(self, df, sparse, dtype):
359359
tm.assert_frame_equal(result, expected)
360360

361361
def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
362-
df["cat"] = pd.Categorical(["x", "y", "y"])
362+
df["cat"] = Categorical(["x", "y", "y"])
363363
result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
364364
if sparse:
365365
arr = SparseArray
@@ -386,30 +386,30 @@ def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
386386
"get_dummies_kwargs,expected",
387387
[
388388
(
389-
{"data": pd.DataFrame(({"ä": ["a"]}))},
390-
pd.DataFrame({"ä_a": [1]}, dtype=np.uint8),
389+
{"data": DataFrame(({"ä": ["a"]}))},
390+
DataFrame({"ä_a": [1]}, dtype=np.uint8),
391391
),
392392
(
393-
{"data": pd.DataFrame({"x": ["ä"]})},
394-
pd.DataFrame({"x_ä": [1]}, dtype=np.uint8),
393+
{"data": DataFrame({"x": ["ä"]})},
394+
DataFrame({"x_ä": [1]}, dtype=np.uint8),
395395
),
396396
(
397-
{"data": pd.DataFrame({"x": ["a"]}), "prefix": "ä"},
398-
pd.DataFrame({"ä_a": [1]}, dtype=np.uint8),
397+
{"data": DataFrame({"x": ["a"]}), "prefix": "ä"},
398+
DataFrame({"ä_a": [1]}, dtype=np.uint8),
399399
),
400400
(
401-
{"data": pd.DataFrame({"x": ["a"]}), "prefix_sep": "ä"},
402-
pd.DataFrame({"xäa": [1]}, dtype=np.uint8),
401+
{"data": DataFrame({"x": ["a"]}), "prefix_sep": "ä"},
402+
DataFrame({"xäa": [1]}, dtype=np.uint8),
403403
),
404404
],
405405
)
406406
def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected):
407-
# GH22084 pd.get_dummies incorrectly encodes unicode characters
407+
# GH22084 get_dummies incorrectly encodes unicode characters
408408
# in dataframe column names
409409
result = get_dummies(**get_dummies_kwargs)
410410
tm.assert_frame_equal(result, expected)
411411

412-
def test_basic_drop_first(self, sparse):
412+
def test_get_dummies_basic_drop_first(self, sparse):
413413
# GH12402 Add a new parameter `drop_first` to avoid collinearity
414414
# Basic case
415415
s_list = list("abc")
@@ -430,7 +430,7 @@ def test_basic_drop_first(self, sparse):
430430
result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
431431
tm.assert_frame_equal(result, expected)
432432

433-
def test_basic_drop_first_one_level(self, sparse):
433+
def test_get_dummies_basic_drop_first_one_level(self, sparse):
434434
# Test the case that categorical variable only has one level.
435435
s_list = list("aaa")
436436
s_series = Series(s_list)
@@ -448,7 +448,7 @@ def test_basic_drop_first_one_level(self, sparse):
448448
result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
449449
tm.assert_frame_equal(result, expected)
450450

451-
def test_basic_drop_first_NA(self, sparse):
451+
def test_get_dummies_basic_drop_first_NA(self, sparse):
452452
# Test NA handling together with drop_first
453453
s_NA = ["a", "b", np.nan]
454454
res = get_dummies(s_NA, drop_first=True, sparse=sparse)
@@ -481,7 +481,7 @@ def test_dataframe_dummies_drop_first(self, df, sparse):
481481
tm.assert_frame_equal(result, expected)
482482

483483
def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype):
484-
df["cat"] = pd.Categorical(["x", "y", "y"])
484+
df["cat"] = Categorical(["x", "y", "y"])
485485
result = get_dummies(df, drop_first=True, sparse=sparse)
486486
expected = DataFrame(
487487
{"C": [1, 2, 3], "A_b": [0, 1, 0], "B_c": [0, 0, 1], "cat_y": [0, 1, 1]}
@@ -521,24 +521,24 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
521521
expected = expected[["C", "A_b", "B_c"]]
522522
tm.assert_frame_equal(result, expected)
523523

524-
def test_int_int(self):
524+
def test_get_dummies_int_int(self):
525525
data = Series([1, 2, 1])
526-
result = pd.get_dummies(data)
526+
result = get_dummies(data)
527527
expected = DataFrame([[1, 0], [0, 1], [1, 0]], columns=[1, 2], dtype=np.uint8)
528528
tm.assert_frame_equal(result, expected)
529529

530-
data = Series(pd.Categorical(["a", "b", "a"]))
531-
result = pd.get_dummies(data)
530+
data = Series(Categorical(["a", "b", "a"]))
531+
result = get_dummies(data)
532532
expected = DataFrame(
533-
[[1, 0], [0, 1], [1, 0]], columns=pd.Categorical(["a", "b"]), dtype=np.uint8
533+
[[1, 0], [0, 1], [1, 0]], columns=Categorical(["a", "b"]), dtype=np.uint8
534534
)
535535
tm.assert_frame_equal(result, expected)
536536

537-
def test_int_df(self, dtype):
537+
def test_get_dummies_int_df(self, dtype):
538538
data = DataFrame(
539539
{
540540
"A": [1, 2, 1],
541-
"B": pd.Categorical(["a", "b", "a"]),
541+
"B": Categorical(["a", "b", "a"]),
542542
"C": [1, 2, 1],
543543
"D": [1.0, 2.0, 1.0],
544544
}
@@ -549,22 +549,22 @@ def test_int_df(self, dtype):
549549
columns=columns,
550550
)
551551
expected[columns[2:]] = expected[columns[2:]].astype(dtype)
552-
result = pd.get_dummies(data, columns=["A", "B"], dtype=dtype)
552+
result = get_dummies(data, columns=["A", "B"], dtype=dtype)
553553
tm.assert_frame_equal(result, expected)
554554

555-
def test_dataframe_dummies_preserve_categorical_dtype(self, dtype):
555+
@pytest.mark.parametrize("ordered", [True, False])
556+
def test_dataframe_dummies_preserve_categorical_dtype(self, dtype, ordered):
556557
# GH13854
557-
for ordered in [False, True]:
558-
cat = pd.Categorical(list("xy"), categories=list("xyz"), ordered=ordered)
559-
result = get_dummies(cat, dtype=dtype)
558+
cat = Categorical(list("xy"), categories=list("xyz"), ordered=ordered)
559+
result = get_dummies(cat, dtype=dtype)
560560

561-
data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype))
562-
cols = pd.CategoricalIndex(
563-
cat.categories, categories=cat.categories, ordered=ordered
564-
)
565-
expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype))
561+
data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype))
562+
cols = CategoricalIndex(
563+
cat.categories, categories=cat.categories, ordered=ordered
564+
)
565+
expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype))
566566

567-
tm.assert_frame_equal(result, expected)
567+
tm.assert_frame_equal(result, expected)
568568

569569
@pytest.mark.parametrize("sparse", [True, False])
570570
def test_get_dummies_dont_sparsify_all_columns(self, sparse):
@@ -593,10 +593,10 @@ def test_get_dummies_duplicate_columns(self, df):
593593
tm.assert_frame_equal(result, expected)
594594

595595
def test_get_dummies_all_sparse(self):
596-
df = pd.DataFrame({"A": [1, 2]})
597-
result = pd.get_dummies(df, columns=["A"], sparse=True)
596+
df = DataFrame({"A": [1, 2]})
597+
result = get_dummies(df, columns=["A"], sparse=True)
598598
dtype = SparseDtype("uint8", 0)
599-
expected = pd.DataFrame(
599+
expected = DataFrame(
600600
{
601601
"A_1": SparseArray([1, 0], dtype=dtype),
602602
"A_2": SparseArray([0, 1], dtype=dtype),
@@ -607,7 +607,7 @@ def test_get_dummies_all_sparse(self):
607607
@pytest.mark.parametrize("values", ["baz"])
608608
def test_get_dummies_with_string_values(self, values):
609609
# issue #28383
610-
df = pd.DataFrame(
610+
df = DataFrame(
611611
{
612612
"bar": [1, 2, 3, 4, 5, 6],
613613
"foo": ["one", "one", "one", "two", "two", "two"],
@@ -619,26 +619,4 @@ def test_get_dummies_with_string_values(self, values):
619619
msg = "Input must be a list-like for parameter `columns`"
620620

621621
with pytest.raises(TypeError, match=msg):
622-
pd.get_dummies(df, columns=values)
623-
624-
625-
class TestCategoricalReshape:
626-
def test_reshaping_multi_index_categorical(self):
627-
628-
cols = ["ItemA", "ItemB", "ItemC"]
629-
data = {c: tm.makeTimeDataFrame() for c in cols}
630-
df = pd.concat({c: data[c].stack() for c in data}, axis="columns")
631-
df.index.names = ["major", "minor"]
632-
df["str"] = "foo"
633-
634-
df["category"] = df["str"].astype("category")
635-
result = df["category"].unstack()
636-
637-
dti = df.index.levels[0]
638-
c = Categorical(["foo"] * len(dti))
639-
expected = DataFrame(
640-
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
641-
columns=Index(list("ABCD"), name="minor"),
642-
index=dti.rename("major"),
643-
)
644-
tm.assert_frame_equal(result, expected)
622+
get_dummies(df, columns=values)

pandas/tests/series/methods/test_unstack.py

+17
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,20 @@ def test_unstack_mixed_type_name_in_multiindex(
118118
expected_values, columns=expected_columns, index=expected_index,
119119
)
120120
tm.assert_frame_equal(result, expected)
121+
122+
123+
def test_unstack_multi_index_categorical_values():
124+
125+
mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"])
126+
ser = pd.Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
127+
128+
result = ser.unstack()
129+
130+
dti = ser.index.levels[0]
131+
c = pd.Categorical(["foo"] * len(dti))
132+
expected = DataFrame(
133+
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
134+
columns=pd.Index(list("ABCD"), name="minor"),
135+
index=dti.rename("major"),
136+
)
137+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)