Skip to content

Commit 7afd976

Browse files
Justin SolinskyJustin Solinsky
Justin Solinsky
authored and
Justin Solinsky
committed
ENH union_categoricals supports ignore_order GH13410
1 parent 198fc8f commit 7afd976

File tree

4 files changed

+25
-11
lines changed

4 files changed

+25
-11
lines changed

doc/source/categorical.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,17 @@ The below raises ``TypeError`` because the categories are ordered and not identi
693693
Out[3]:
694694
TypeError: to union ordered Categoricals, all categories must be the same
695695
696+
.. versionadded:: 0.20.0
697+
698+
Ordered categoricals with different categories or orderings can be combined by
699+
using the ``ignore_ordered=True`` argument.
700+
701+
.. ipython:: python
702+
703+
a = pd.Categorical(["a", "b", "c"], ordered=True)
704+
b = pd.Categorical(["c", "b", "a"], ordered=True)
705+
union_categoricals([a, b], ignore_order=True)
706+
696707
``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing
697708
categorical data, but note that the resulting array will always be a plain ``Categorical``
698709

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ Other enhancements
137137

138138
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
139139
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
140-
140+
- ``ignore_ordered`` argument added to ``pd.types.concat.union_categoricals``; setting the argument to true will ignore the ordered attribute of unioned categoricals (:issue:`13410`)
141141

142142
.. _whatsnew_0200.api_breaking:
143143

pandas/tools/tests/test_concat.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,14 +1675,14 @@ def test_union_categoricals_ignore_order(self):
16751675
tm.assert_categorical_equal(res, exp)
16761676

16771677
res = union_categoricals([c1, c1], ignore_order=True)
1678-
exp = Categorical([1, 2, 3, 1, 2, 3], ordered=False)
1678+
exp = Categorical([1, 2, 3, 1, 2, 3])
16791679
tm.assert_categorical_equal(res, exp)
16801680

16811681
c1 = Categorical([1, 2, 3, np.nan], ordered=True)
16821682
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
16831683

16841684
res = union_categoricals([c1, c2], ignore_order=True)
1685-
exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=False)
1685+
exp = Categorical([1, 2, 3, np.nan, 3, 2])
16861686
tm.assert_categorical_equal(res, exp)
16871687

16881688
c1 = Categorical([1, 2, 3], ordered=True)
@@ -1692,13 +1692,16 @@ def test_union_categoricals_ignore_order(self):
16921692
exp = Categorical([1, 2, 3, 1, 2, 3])
16931693
tm.assert_categorical_equal(res, exp)
16941694

1695-
c1 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1696-
c2 = Categorical([1, 2, 3], ordered=True)
1697-
1698-
res = union_categoricals([c1, c2], ignore_order=True, sort_categories=True)
1695+
res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True)
16991696
exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
17001697
tm.assert_categorical_equal(res, exp)
17011698

1699+
c1 = Categorical([1, 2, 3], ordered=True)
1700+
c2 = Categorical([4, 5, 6], ordered=True)
1701+
result = union_categoricals([c1, c2], ignore_order=True)
1702+
expected = Categorical([1, 2, 3, 4, 5, 6])
1703+
tm.assert_categorical_equal(result, expected)
1704+
17021705
def test_union_categoricals_sort(self):
17031706
# GH 13846
17041707
c1 = Categorical(['x', 'y', 'z'])

pandas/types/concat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,8 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
223223
If true, resulting categories will be lexsorted, otherwise
224224
they will be ordered as they appear in the data.
225225
ignore_order: boolean, default False
226-
If true, ordered categories will be ignored. Results in
227-
an unordered categorical.
226+
If true, the ordered attribute of the Categoricals will be ignored.
227+
Results in an unordered categorical.
228228
229229
Returns
230230
-------
@@ -238,7 +238,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
238238
- all inputs are ordered and their categories are not identical
239239
- sort_categories=True and Categoricals are ordered
240240
ValueError
241-
Emmpty list of categoricals passed
241+
Empty list of categoricals passed
242242
"""
243243
from pandas import Index, Categorical, CategoricalIndex, Series
244244

@@ -275,7 +275,7 @@ def _maybe_unwrap(x):
275275
categories = categories.sort_values()
276276
indexer = categories.get_indexer(first.categories)
277277
new_codes = take_1d(indexer, new_codes, fill_value=-1)
278-
elif ignore_order | all(not c.ordered for c in to_union):
278+
elif ignore_order or all(not c.ordered for c in to_union):
279279
# different categories - union and recode
280280
cats = first.categories.append([c.categories for c in to_union[1:]])
281281
categories = Index(cats.unique())

0 commit comments

Comments
 (0)