Skip to content

REF: share Index.union #38671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2656,12 +2656,59 @@ def union(self, other, sort=None):
>>> idx2 = pd.Index([1, 2, 3, 4])
>>> idx1.union(idx2)
Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')

MultiIndex case

>>> idx1 = pd.MultiIndex.from_arrays(
... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
... )
>>> idx1
MultiIndex([(1, 'Red'),
(1, 'Blue'),
(2, 'Red'),
(2, 'Blue')],
)
>>> idx2 = pd.MultiIndex.from_arrays(
... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
... )
>>> idx2
MultiIndex([(3, 'Red'),
(3, 'Green'),
(2, 'Red'),
(2, 'Green')],
)
>>> idx1.union(idx2)
MultiIndex([(1, 'Blue'),
(1, 'Red'),
(2, 'Blue'),
(2, 'Green'),
(2, 'Red'),
(3, 'Green'),
(3, 'Red')],
)
>>> idx1.union(idx2, sort=False)
MultiIndex([(1, 'Red'),
(1, 'Blue'),
(2, 'Red'),
(2, 'Blue'),
(3, 'Red'),
(3, 'Green'),
(2, 'Green')],
)
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, result_name = self._convert_can_do_setop(other)

if not is_dtype_equal(self.dtype, other.dtype):
if isinstance(self, ABCMultiIndex) and not is_object_dtype(
unpack_nested_dtype(other)
):
raise NotImplementedError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hit in tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

"Can only union MultiIndex with MultiIndex or Index of tuples, "
"try mi.to_flat_index().union(other) instead."
)

dtype = find_common_type([self.dtype, other.dtype])
if self._is_numeric_dtype and other._is_numeric_dtype:
# Right now, we treat union(int, float) a bit special.
Expand All @@ -2680,6 +2727,14 @@ def union(self, other, sort=None):
right = other.astype(dtype, copy=False)
return left.union(right, sort=sort)

elif not len(other) or self.equals(other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

coverage for all of these?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

# NB: whether this (and the `if not len(self)` check below) come before
# or after the is_dtype_equal check above affects the returned dtype
return self._get_reconciled_name_object(other)

elif not len(self):
return other._get_reconciled_name_object(self)

result = self._union(other, sort=sort)

return self._wrap_setop_result(other, result)
Expand All @@ -2703,12 +2758,6 @@ def _union(self, other, sort):
-------
Index
"""
if not len(other) or self.equals(other):
return self

if not len(self):
return other

# TODO(EA): setops-refactor, clean all this up
lvals = self._values
rvals = other._values
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,9 @@ def _can_fast_intersect(self: _T, other: _T) -> bool:
# so intersection will preserve freq
return True

elif not len(self) or not len(other):
return False

elif isinstance(self.freq, Tick):
# We "line up" if and only if the difference between two of our points
# is a multiple of our freq
Expand Down Expand Up @@ -794,9 +797,6 @@ def _fast_union(self, other, sort=None):
return left

def _union(self, other, sort):
if not len(other) or self.equals(other) or not len(self):
return super()._union(other, sort=sort)

# We are called by `union`, which is responsible for this validation
assert isinstance(other, type(self))

Expand Down
89 changes: 0 additions & 89 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3502,98 +3502,9 @@ def equal_levels(self, other) -> bool:
# --------------------------------------------------------------------
# Set Methods

def union(self, other, sort=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add these examples to the existing Index.union doc-string?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

restored + greenish

"""
Form the union of two MultiIndex objects

Parameters
----------
other : MultiIndex or array / Index of tuples
sort : False or None, default None
Whether to sort the resulting Index.

* None : Sort the result, except when

1. `self` and `other` are equal.
2. `self` has length 0.
3. Some values in `self` or `other` cannot be compared.
A RuntimeWarning is issued in this case.

* False : do not sort the result.

.. versionadded:: 0.24.0

.. versionchanged:: 0.24.1

Changed the default value from ``True`` to ``None``
(without change in behaviour).

Returns
-------
Index

Examples
--------
>>> idx1 = pd.MultiIndex.from_arrays(
... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
... )
>>> idx1
MultiIndex([(1, 'Red'),
(1, 'Blue'),
(2, 'Red'),
(2, 'Blue')],
)
>>> idx2 = pd.MultiIndex.from_arrays(
... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
... )
>>> idx2
MultiIndex([(3, 'Red'),
(3, 'Green'),
(2, 'Red'),
(2, 'Green')],
)

>>> idx1.union(idx2)
MultiIndex([(1, 'Blue'),
(1, 'Red'),
(2, 'Blue'),
(2, 'Green'),
(2, 'Red'),
(3, 'Green'),
(3, 'Red')],
)

>>> idx1.union(idx2, sort=False)
MultiIndex([(1, 'Red'),
(1, 'Blue'),
(2, 'Red'),
(2, 'Blue'),
(3, 'Red'),
(3, 'Green'),
(2, 'Green')],
)
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, _ = self._convert_can_do_setop(other)

if not len(other) or self.equals(other):
return self._get_reconciled_name_object(other)

if not len(self):
return other._get_reconciled_name_object(self)

return self._union(other, sort=sort)

def _union(self, other, sort):
other, result_names = self._convert_can_do_setop(other)

if not self._should_compare(other):
raise NotImplementedError(
"Can only union MultiIndex with MultiIndex or Index of tuples, "
"try mi.to_flat_index().union(other) instead."
)

# We could get here with CategoricalIndex other
rvals = other._values.astype(object, copy=False)
uniq_tuples = lib.fast_unique_multiple([self._values, rvals], sort=sort)
Expand Down
11 changes: 0 additions & 11 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,17 +646,6 @@ def _difference(self, other, sort):
return self._setop(other, sort, opname="difference")

def _union(self, other, sort):
if not len(other) or self.equals(other) or not len(self):
return super()._union(other, sort=sort)

# We are called by `union`, which is responsible for this validation
assert isinstance(other, type(self))

if not is_dtype_equal(self.dtype, other.dtype):
this = self.astype("O")
other = other.astype("O")
return this._union(other, sort=sort)

return self._setop(other, sort, opname="_union")

# ------------------------------------------------------------------------
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,9 +576,6 @@ def _union(self, other, sort):
-------
union : Index
"""
if not len(other) or self.equals(other) or not len(self):
return super()._union(other, sort=sort)

if isinstance(other, RangeIndex) and sort is None:
start_s, step_s = self.start, self.step
end_s = self.start + self.step * (len(self) - 1)
Expand Down