Skip to content

Commit 026ea8c

Browse files
committed
Merge pull request #6016 from TomAugspurger/symmetric-difference
ENH: Add sym_diff for index
2 parents 0f73f5f + 3bee2a0 commit 026ea8c

File tree

4 files changed

+113
-4
lines changed

4 files changed

+113
-4
lines changed

doc/source/indexing.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,18 @@ operators:
15041504
a & b
15051505
a - b
15061506
1507+
Also available is the ``sym_diff (^)`` operation, which returns elements
1508+
that appear in either ``idx1`` or ``idx2`` but not both. This is
1509+
equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
1510+
with duplicates dropped.
1511+
1512+
.. ipython:: python
1513+
1514+
idx1 = Index([1, 2, 3, 4])
1515+
idx2 = Index([2, 3, 4, 5])
1516+
idx1.sym_diff(idx2)
1517+
idx1 ^ idx2
1518+
15071519
The ``isin`` method of Index objects
15081520
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15091521

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ New features
5454
~~~~~~~~~~~~
5555

5656
- Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`)
57+
- Added the ``sym_diff`` method to ``Index`` (:issue:`5543`)
5758

5859
API Changes
5960
~~~~~~~~~~~

pandas/core/index.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,9 @@ def __and__(self, other):
866866
def __or__(self, other):
867867
return self.union(other)
868868

869+
def __xor__(self, other):
870+
return self.sym_diff(other)
871+
869872
def union(self, other):
870873
"""
871874
Form the union of two Index objects and sorts if possible
@@ -973,16 +976,20 @@ def diff(self, other):
973976
"""
974977
Compute sorted set difference of two Index objects
975978
979+
Parameters
980+
----------
981+
other : Index or array-like
982+
983+
Returns
984+
-------
985+
diff : Index
986+
976987
Notes
977988
-----
978989
One can do either of these and achieve the same result
979990
980991
>>> index - index2
981992
>>> index.diff(index2)
982-
983-
Returns
984-
-------
985-
diff : Index
986993
"""
987994

988995
if not hasattr(other, '__iter__'):
@@ -1000,6 +1007,49 @@ def diff(self, other):
10001007
theDiff = sorted(set(self) - set(other))
10011008
return Index(theDiff, name=result_name)
10021009

1010+
def sym_diff(self, other, result_name=None):
1011+
"""
1012+
Compute the sorted symmetric_difference of two Index objects.
1013+
1014+
Parameters
1015+
----------
1016+
1017+
other : array-like
1018+
result_name : str
1019+
1020+
Returns
1021+
-------
1022+
sym_diff : Index
1023+
1024+
Notes
1025+
-----
1026+
``sym_diff`` contains elements that appear in either ``idx1`` or
1027+
``idx2`` but not both. Equivalent to the Index created by
1028+
``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped.
1029+
1030+
Examples
1031+
--------
1032+
>>> idx1 = Index([1, 2, 3, 4])
1033+
>>> idx2 = Index([2, 3, 4, 5])
1034+
>>> idx1.sym_diff(idx2)
1035+
Int64Index([1, 5], dtype='int64')
1036+
1037+
You can also use the ``^`` operator:
1038+
1039+
>>> idx1 ^ idx2
1040+
Int64Index([1, 5], dtype='int64')
1041+
"""
1042+
if not hasattr(other, '__iter__'):
1043+
raise TypeError('Input must be iterable!')
1044+
1045+
if not isinstance(other, Index):
1046+
other = Index(other)
1047+
result_name = result_name or self.name
1048+
1049+
the_diff = sorted(set((self - other) + (other - self)))
1050+
return Index(the_diff, name=result_name)
1051+
1052+
10031053
def unique(self):
10041054
"""
10051055
Return array of unique values in the Index. Significantly faster than

pandas/tests/test_index.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,52 @@ def test_diff(self):
471471
# non-iterable input
472472
assertRaisesRegexp(TypeError, "iterable", first.diff, 0.5)
473473

474+
def test_symmetric_diff(self):
475+
# smoke
476+
idx1 = Index([1, 2, 3, 4], name='idx1')
477+
idx2 = Index([2, 3, 4, 5])
478+
result = idx1.sym_diff(idx2)
479+
expected = Index([1, 5])
480+
self.assert_(tm.equalContents(result, expected))
481+
self.assert_(result.name is None)
482+
483+
# __xor__ syntax
484+
expected = idx1 ^ idx2
485+
self.assert_(tm.equalContents(result, expected))
486+
self.assert_(result.name is None)
487+
488+
# multiIndex
489+
idx1 = MultiIndex.from_tuples(self.tuples)
490+
idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)])
491+
result = idx1.sym_diff(idx2)
492+
expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)])
493+
self.assert_(tm.equalContents(result, expected))
494+
495+
# nans:
496+
idx1 = Index([1, 2, np.nan])
497+
idx2 = Index([0, 1, np.nan])
498+
result = idx1.sym_diff(idx2)
499+
expected = Index([0.0, np.nan, 2.0, np.nan]) # oddness with nans
500+
nans = pd.isnull(expected)
501+
self.assert_(pd.isnull(result[nans]).all())
502+
self.assert_(tm.equalContents(result[~nans], expected[~nans]))
503+
504+
# other not an Index:
505+
idx1 = Index([1, 2, 3, 4], name='idx1')
506+
idx2 = np.array([2, 3, 4, 5])
507+
expected = Index([1, 5])
508+
result = idx1.sym_diff(idx2)
509+
self.assert_(tm.equalContents(result, expected))
510+
self.assertEquals(result.name, 'idx1')
511+
512+
result = idx1.sym_diff(idx2, result_name='new_name')
513+
self.assert_(tm.equalContents(result, expected))
514+
self.assertEquals(result.name, 'new_name')
515+
516+
# other isn't iterable
517+
with tm.assertRaises(TypeError):
518+
idx1 - 1
519+
474520
def test_pickle(self):
475521
def testit(index):
476522
pickled = pickle.dumps(index)

0 commit comments

Comments
 (0)