Skip to content

Commit 198fc8f

Browse files
Justin SolinskyJustin Solinsky
Justin Solinsky
authored and
Justin Solinsky
committed
ENH union_categoricals supports ignore_order GH13410
1 parent 684c4d5 commit 198fc8f

File tree

2 files changed

+42
-3
lines changed

2 files changed

+42
-3
lines changed

pandas/tools/tests/test_concat.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,39 @@ def test_union_categoricals_ordered(self):
16661666
with tm.assertRaisesRegexp(TypeError, msg):
16671667
union_categoricals([c1, c2])
16681668

1669+
def test_union_categoricals_ignore_order(self):
1670+
c1 = Categorical([1, 2, 3], ordered=True)
1671+
c2 = Categorical([1, 2, 3], ordered=False)
1672+
1673+
res = union_categoricals([c1, c2], ignore_order=True)
1674+
exp = Categorical([1, 2, 3, 1, 2, 3])
1675+
tm.assert_categorical_equal(res, exp)
1676+
1677+
res = union_categoricals([c1, c1], ignore_order=True)
1678+
exp = Categorical([1, 2, 3, 1, 2, 3], ordered=False)
1679+
tm.assert_categorical_equal(res, exp)
1680+
1681+
c1 = Categorical([1, 2, 3, np.nan], ordered=True)
1682+
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
1683+
1684+
res = union_categoricals([c1, c2], ignore_order=True)
1685+
exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=False)
1686+
tm.assert_categorical_equal(res, exp)
1687+
1688+
c1 = Categorical([1, 2, 3], ordered=True)
1689+
c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1690+
1691+
res = union_categoricals([c1, c2], ignore_order=True)
1692+
exp = Categorical([1, 2, 3, 1, 2, 3])
1693+
tm.assert_categorical_equal(res, exp)
1694+
1695+
c1 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1696+
c2 = Categorical([1, 2, 3], ordered=True)
1697+
1698+
res = union_categoricals([c1, c2], ignore_order=True, sort_categories=True)
1699+
exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
1700+
tm.assert_categorical_equal(res, exp)
1701+
16691702
def test_union_categoricals_sort(self):
16701703
# GH 13846
16711704
c1 = Categorical(['x', 'y', 'z'])

pandas/types/concat.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def _concat_asobject(to_concat):
208208
return _concat_asobject(to_concat)
209209

210210

211-
def union_categoricals(to_union, sort_categories=False):
211+
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
212212
"""
213213
Combine list-like of Categorical-like, unioning categories. All
214214
categories must have the same dtype.
@@ -222,6 +222,9 @@ def union_categoricals(to_union, sort_categories=False):
222222
sort_categories : boolean, default False
223223
If true, resulting categories will be lexsorted, otherwise
224224
they will be ordered as they appear in the data.
225+
ignore_order: boolean, default False
226+
If true, ordered categories will be ignored. Results in
227+
an unordered categorical.
225228
226229
Returns
227230
-------
@@ -264,15 +267,15 @@ def _maybe_unwrap(x):
264267
ordered = first.ordered
265268
new_codes = np.concatenate([c.codes for c in to_union])
266269

267-
if sort_categories and ordered:
270+
if sort_categories and not ignore_order and ordered:
268271
raise TypeError("Cannot use sort_categories=True with "
269272
"ordered Categoricals")
270273

271274
if sort_categories and not categories.is_monotonic_increasing:
272275
categories = categories.sort_values()
273276
indexer = categories.get_indexer(first.categories)
274277
new_codes = take_1d(indexer, new_codes, fill_value=-1)
275-
elif all(not c.ordered for c in to_union):
278+
elif ignore_order | all(not c.ordered for c in to_union):
276279
# different categories - union and recode
277280
cats = first.categories.append([c.categories for c in to_union[1:]])
278281
categories = Index(cats.unique())
@@ -297,6 +300,9 @@ def _maybe_unwrap(x):
297300
else:
298301
raise TypeError('Categorical.ordered must be the same')
299302

303+
if ignore_order:
304+
ordered = False
305+
300306
return Categorical(new_codes, categories=categories, ordered=ordered,
301307
fastpath=True)
302308

0 commit comments

Comments
 (0)