Skip to content

Commit b1a9421

Browse files
TomAugspurgerjreback
authored andcommitted
BUG: Fixed union_categoricals with unordered cats (#19097)
* BUG: Fixed union_categoricals with unordered cats Closes #19096 * TST: Added concat test
1 parent e1d525c commit b1a9421

File tree

4 files changed

+32
-2
lines changed

4 files changed

+32
-2
lines changed

doc/source/whatsnew/v0.23.0.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,11 @@ Numeric
453453
Categorical
454454
^^^^^^^^^^^
455455

456-
- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
457456
-
457+
- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result
458+
when all the categoricals had the same categories, but in a different order.
459+
This affected :func:`pandas.concat` with Categorical data (:issue:`19096`).
460+
- Bug in ``Categorical.equals`` between two unordered categories with the same categories, but in a different order (:issue:`16603`)
458461
-
459462

460463
Other

pandas/core/dtypes/concat.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,16 @@ def _maybe_unwrap(x):
339339
# identical categories - fastpath
340340
categories = first.categories
341341
ordered = first.ordered
342-
new_codes = np.concatenate([c.codes for c in to_union])
342+
343+
if all(first.categories.equals(other.categories)
344+
for other in to_union[1:]):
345+
new_codes = np.concatenate([c.codes for c in to_union])
346+
else:
347+
codes = [first.codes] + [_recode_for_categories(other.codes,
348+
other.categories,
349+
first.categories)
350+
for other in to_union[1:]]
351+
new_codes = np.concatenate(codes)
343352

344353
if sort_categories and not ignore_order and ordered:
345354
raise TypeError("Cannot use sort_categories=True with "

pandas/tests/reshape/test_concat.py

+9
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,15 @@ def test_concat_categorical(self):
481481
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
482482
tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
483483

484+
def test_union_categorical_same_categories_different_order(self):
485+
# https://github.com/pandas-dev/pandas/issues/19096
486+
a = pd.Series(Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c']))
487+
b = pd.Series(Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c']))
488+
result = pd.concat([a, b], ignore_index=True)
489+
expected = pd.Series(Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
490+
categories=['a', 'b', 'c']))
491+
tm.assert_series_equal(result, expected)
492+
484493
def test_concat_categorical_coercion(self):
485494
# GH 13524
486495

pandas/tests/reshape/test_union_categoricals.py

+9
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ def test_union_categorical_same_category(self):
129129
categories=['x', 'y', 'z'])
130130
tm.assert_categorical_equal(res, exp)
131131

132+
def test_union_categorical_same_categories_different_order(self):
133+
# https://github.com/pandas-dev/pandas/issues/19096
134+
c1 = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c'])
135+
c2 = Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c'])
136+
result = union_categoricals([c1, c2])
137+
expected = Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
138+
categories=['a', 'b', 'c'])
139+
tm.assert_categorical_equal(result, expected)
140+
132141
def test_union_categoricals_ordered(self):
133142
c1 = Categorical([1, 2, 3], ordered=True)
134143
c2 = Categorical([1, 2, 3], ordered=False)

0 commit comments

Comments
 (0)