Skip to content

Commit fada6f0

Browse files
committed
Add optional sort parameter to difference method in subclasses
1 parent 382ddd4 commit fada6f0

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ Other Enhancements
238238
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
239239
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)
240240
- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`)
241+
- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`)
241242

242243
.. _whatsnew_0240.api_breaking:
243244

pandas/core/indexes/multi.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2769,10 +2769,18 @@ def intersection(self, other):
27692769
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
27702770
names=result_names)
27712771

2772-
def difference(self, other):
2772+
def difference(self, other, sort=True):
27732773
"""
27742774
Compute sorted set difference of two MultiIndex objects
27752775
2776+
Parameters
2777+
----------
2778+
other : MultiIndex
2779+
sort : bool, default True
2780+
Sort the resulting MultiIndex if possible
2781+
2782+
.. versionadded:: 0.24.0
2783+
27762784
Returns
27772785
-------
27782786
diff : MultiIndex
@@ -2788,8 +2796,10 @@ def difference(self, other):
27882796
labels=[[]] * self.nlevels,
27892797
names=result_names, verify_integrity=False)
27902798

2791-
difference = sorted(set(self._ndarray_values) -
2792-
set(other._ndarray_values))
2799+
difference = set(self._ndarray_values) - set(other._ndarray_values)
2800+
2801+
if sort:
2802+
difference = sorted(difference)
27932803

27942804
if len(difference) == 0:
27952805
return MultiIndex(levels=[[]] * self.nlevels,

pandas/tests/indexes/test_base.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,15 +1105,17 @@ def test_iadd_string(self):
11051105

11061106
@pytest.mark.parametrize("second_name,expected", [
11071107
(None, None), ('name', 'name')])
1108-
def test_difference_name_preservation(self, second_name, expected):
1108+
@pytest.mark.parametrize("sort", [
1109+
(True, False)])
1110+
def test_difference_name_preservation(self, second_name, expected, sort):
11091111
# TODO: replace with fixturesult
11101112
first = self.strIndex[5:20]
11111113
second = self.strIndex[:10]
11121114
answer = self.strIndex[10:20]
11131115

11141116
first.name = 'name'
11151117
second.name = second_name
1116-
result = first.difference(second)
1118+
result = first.difference(second, sort)
11171119

11181120
assert tm.equalContents(result, answer)
11191121

@@ -1122,18 +1124,22 @@ def test_difference_name_preservation(self, second_name, expected):
11221124
else:
11231125
assert result.name == expected
11241126

1125-
def test_difference_empty_arg(self):
1127+
@pytest.mark.parametrize("sort", [
1128+
(True, False)])
1129+
def test_difference_empty_arg(self, sort):
11261130
first = self.strIndex[5:20]
11271131
first.name == 'name'
1128-
result = first.difference([])
1132+
result = first.difference([], sort=sort)
11291133

11301134
assert tm.equalContents(result, first)
11311135
assert result.name == first.name
11321136

1133-
def test_difference_identity(self):
1137+
@pytest.mark.parametrize("sort", [
1138+
(True, False)])
1139+
def test_difference_identity(self, sort):
11341140
first = self.strIndex[5:20]
11351141
first.name == 'name'
1136-
result = first.difference(first)
1142+
result = first.difference(first, sort)
11371143

11381144
assert len(result) == 0
11391145
assert result.name == first.name
@@ -1182,13 +1188,15 @@ def test_symmetric_difference_non_index(self):
11821188
assert tm.equalContents(result, expected)
11831189
assert result.name == 'new_name'
11841190

1185-
def test_difference_type(self):
1191+
@pytest.mark.parametrize("sort", [
1192+
(True, False)])
1193+
def test_difference_type(self, sort):
11861194
# GH 20040
11871195
# If taking difference of a set and itself, it
11881196
# needs to preserve the type of the index
11891197
skip_index_keys = ['repeats']
11901198
for key, index in self.generate_index_types(skip_index_keys):
1191-
result = index.difference(index)
1199+
result = index.difference(index, sort)
11921200
expected = index.drop(index)
11931201
tm.assert_index_equal(result, expected)
11941202

@@ -2402,13 +2410,15 @@ def test_intersection_different_type_base(self, klass):
24022410
result = first.intersection(klass(second.values))
24032411
assert tm.equalContents(result, second)
24042412

2405-
def test_difference_base(self):
2413+
@pytest.mark.parametrize("sort", [
2414+
(True, False)])
2415+
def test_difference_base(self, sort):
24062416
# (same results for py2 and py3 but sortedness not tested elsewhere)
24072417
index = self.create_index()
24082418
first = index[:4]
24092419
second = index[3:]
24102420

2411-
result = first.difference(second)
2421+
result = first.difference(second, sort)
24122422
expected = Index([0, 1, 'a'])
24132423
tm.assert_index_equal(result, expected)
24142424

0 commit comments

Comments
 (0)