Skip to content

Commit 0528c57

Browse files
committed
Compare with empty DataFrame, not just check empty
1 parent 0d99c2a commit 0528c57

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

pandas/core/reshape.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,8 +1095,7 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False,
10951095
cat = Categorical.from_array(Series(data), ordered=True)
10961096
levels = cat.categories
10971097

1098-
# if all NaN
1099-
if not dummy_na and len(levels) == 0:
1098+
def get_empty_Frame(data, sparse):
11001099
if isinstance(data, Series):
11011100
index = data.index
11021101
else:
@@ -1106,11 +1105,19 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False,
11061105
else:
11071106
return SparseDataFrame(index=index)
11081107

1108+
# if all NaN
1109+
if not dummy_na and len(levels) == 0:
1110+
return get_empty_Frame(data, sparse)
1111+
11091112
codes = cat.codes.copy()
11101113
if dummy_na:
11111114
codes[codes == -1] = len(cat.categories)
11121115
levels = np.append(cat.categories, np.nan)
11131116

1117+
# if dummy_na, we just fake a nan level. drop_first will drop it again
1118+
if drop_first and len(levels) == 1:
1119+
return get_empty_Frame(data, sparse)
1120+
11141121
number_of_cols = len(levels)
11151122

11161123
if prefix is not None:

pandas/tests/test_reshape.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -432,13 +432,28 @@ def test_basic_drop_first(self):
432432
assert_frame_equal(result, expected)
433433

434434
expected.index = list('ABC')
435-
result = get_dummies(s_series_index, sparse=self.sparse, drop_first=True)
435+
result = get_dummies(s_series_index, sparse=self.sparse,
436+
drop_first=True)
436437
assert_frame_equal(result, expected)
437438

438-
# Test the case that categorical variable only has one level.
439439
def test_basic_drop_first_one_level(self):
440-
result = get_dummies(list('aaa'), sparse=self.sparse, drop_first=True)
441-
self.assertEqual(result.empty, True)
440+
# Test the case that categorical variable only has one level.
441+
s_list = list('aaa')
442+
s_series = Series(s_list)
443+
s_series_index = Series(s_list, list('ABC'))
444+
445+
expected = DataFrame(index=np.arange(3))
446+
447+
result = get_dummies(s_list, sparse=self.sparse, drop_first=True)
448+
assert_frame_equal(result, expected)
449+
450+
result = get_dummies(s_series, sparse=self.sparse, drop_first=True)
451+
assert_frame_equal(result, expected)
452+
453+
expected = DataFrame(index=list('ABC'))
454+
result = get_dummies(s_series_index, sparse=self.sparse,
455+
drop_first=True)
456+
assert_frame_equal(result, expected)
442457

443458
def test_basic_drop_first_NA(self):
444459
# Test NA hadling together with drop_first
@@ -449,7 +464,6 @@ def test_basic_drop_first_NA(self):
449464
2: 0.0}})
450465
assert_frame_equal(res, exp)
451466

452-
# Sparse dataframes do not allow nan labelled columns, see #GH8822
453467
res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse,
454468
drop_first=True)
455469
exp_na = DataFrame({'b': {0: 0.0,
@@ -463,7 +477,8 @@ def test_basic_drop_first_NA(self):
463477

464478
res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse,
465479
drop_first=True)
466-
tm.assert_numpy_array_equal(res_just_na.empty, True)
480+
exp_just_na = DataFrame(index=np.arange(1))
481+
assert_frame_equal(res_just_na, exp_just_na)
467482

468483
def test_dataframe_dummies_drop_first(self):
469484
df = self.df[['A', 'B']]

0 commit comments

Comments
 (0)