pandas-dev · gfyoung · Nov 6, 2018 · Feb 22, 2018 · Mar 1, 2018 · Mar 5, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -837,6 +837,8 @@ Indexing
 - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`)
 - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
 - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
+- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the `Index` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
+- Bug in :func:`Index.difference` when taking difference of set and itself as type was not preserved.
 
 
 MultiIndex

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -57,7 +57,7 @@
 import pandas.core.algorithms as algos
 import pandas.core.sorting as sorting
 from pandas.io.formats.printing import pprint_thing
-from pandas.core.ops import make_invalid_op
+from pandas.core.ops import make_invalid_op, get_op_result_name
 from pandas.core.config import get_option
 from pandas.core.strings import StringMethods
 
@@ -1191,7 +1191,7 @@ def _convert_can_do_setop(self, other):
             other = Index(other, name=self.name)
             result_name = self.name
         else:
-            result_name = self.name if self.name == other.name else None
+            result_name = get_op_result_name(self, other)
         return other, result_name
 
     def _convert_for_op(self, value):
@@ -1240,9 +1240,9 @@ def set_names(self, names, level=None, inplace=False):
         Examples
         --------
         >>> Index([1, 2, 3, 4]).set_names('foo')
-        Int64Index([1, 2, 3, 4], dtype='int64')
+        Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
         >>> Index([1, 2, 3, 4]).set_names(['foo'])
-        Int64Index([1, 2, 3, 4], dtype='int64')
+        Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
         >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
                                           (2, u'one'), (2, u'two')],
                                           names=['foo', 'bar'])
@@ -2263,21 +2263,37 @@ def __or__(self, other):
     def __xor__(self, other):
         return self.symmetric_difference(other)
 
-    def _get_consensus_name(self, other):
+    def _get_reconciled_name_object(self, other):
         """
-        Given 2 indexes, give a consensus name meaning
-        we take the not None one, or None if the names differ.
-        Return a new object if we are resetting the name
+        If the result of a set operation will be self,
+        return self, unless the name changes, in which
+        case make a shallow copy of self.
         """
-        if self.name != other.name:
-            if self.name is None or other.name is None:
-                name = self.name or other.name
-            else:
-                name = None
-            if self.name != name:
-                return self._shallow_copy(name=name)
+        name = get_op_result_name(self, other)
+        if self.name != name:
+            return self._shallow_copy(name=name)
         return self
 
+    def _union_corner_case(self, other):
+        """
+        If self or other have no length, or self and other
+        are the same, then return self, after reconciling names
+
+        Returns
+        -------
+        Tuple of (is_corner, result), where is_corner is True if
+        it is a corner case, and result is the reconciled result
+
+        """
+        # GH 9943 9862
+        if len(other) == 0 or self.equals(other):
+            return (True, self._get_reconciled_name_object(other))
+
+        if len(self) == 0:
+            return (True, other._get_reconciled_name_object(self))
+
+        return (False, None)
+
     def union(self, other):
         """
         Form the union of two Index objects and sorts if possible.
@@ -2302,11 +2318,9 @@ def union(self, other):
         self._assert_can_do_setop(other)
         other = _ensure_index(other)
 
-        if len(other) == 0 or self.equals(other):
-            return self._get_consensus_name(other)
-
-        if len(self) == 0:
-            return other._get_consensus_name(self)
+        is_corner_case, corner_result = self._union_corner_case(other)
+        if is_corner_case:
+            return corner_result
 
         # TODO: is_dtype_union_equal is a hack around
         # 1. buggy set ops with duplicates (GH #13432)
@@ -2369,11 +2383,10 @@ def union(self, other):
                                   stacklevel=3)
 
         # for subclasses
-        return self._wrap_union_result(other, result)
+        return self._wrap_setop_result(other, result)
 
-    def _wrap_union_result(self, other, result):
-        name = self.name if self.name == other.name else None
-        return self.__class__(result, name=name)
+    def _wrap_setop_result(self, other, result):
+        return self.__class__(result, name=get_op_result_name(self, other))
 
     def intersection(self, other):
         """
@@ -2403,7 +2416,7 @@ def intersection(self, other):
         other = _ensure_index(other)
 
         if self.equals(other):
-            return self._get_consensus_name(other)
+            return self._get_reconciled_name_object(other)
 
         if not is_dtype_equal(self.dtype, other.dtype):
             this = self.astype('O')
@@ -2423,7 +2436,7 @@ def intersection(self, other):
         if self.is_monotonic and other.is_monotonic:
             try:
                 result = self._inner_indexer(lvals, rvals)[0]
-                return self._wrap_union_result(other, result)
+                return self._wrap_setop_result(other, result)
             except TypeError:
                 pass
 
@@ -2441,6 +2454,13 @@ def intersection(self, other):
             taken.name = None
         return taken
 
+    def _create_empty_index(self, name):
+        """
+        Returns an empty index.  Overridden as necessary by
+        subclasses that have different constructors.
+        """
+        return self.__class__([], name=name)
+
     def difference(self, other):
         """
         Return a new Index with elements from the index that are not in
@@ -2469,7 +2489,7 @@ def difference(self, other):
         self._assert_can_do_setop(other)
 
         if self.equals(other):
-            return Index([], name=self.name)
+            return self._create_empty_index(get_op_result_name(self, other))
 
         other, result_name = self._convert_can_do_setop(other)
 
@@ -3552,7 +3572,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
             return join_index
 
     def _wrap_joined_index(self, joined, other):
-        name = self.name if self.name == other.name else None
+        name = get_op_result_name(self, other)
         return Index(joined, name=name)
 
     def _get_string_slice(self, key, use_lhs=True, use_rhs=True):

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -25,6 +25,7 @@
 import pandas.core.base as base
 import pandas.core.missing as missing
 import pandas.core.indexes.base as ibase
+from pandas.core.ops import get_op_result_name
 
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 _index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -300,6 +301,12 @@ def itemsize(self):
         # Size of the items in categories, not codes.
         return self.values.itemsize
 
+    def _wrap_setop_result(self, other, result):
+        name = get_op_result_name(self, other)
+        return self._simple_new(result, name=name,
+                                categories=self.categories,
+                                ordered=self.ordered)
+
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
@@ -716,6 +723,13 @@ def insert(self, loc, item):
         codes = np.concatenate((codes[:loc], code, codes[loc:]))
         return self._create_from_codes(codes)
 
+    def _create_empty_index(self, name):
+        """
+        Returns an empty index using categories and ordered of self
+        """
+        return CategoricalIndex([], categories=self.categories,
+                                ordered=self.ordered, name=name)
+
     def _concat(self, to_concat, name):
         # if calling index is category, don't check dtype of others
         return CategoricalIndex._concat_same_dtype(self, to_concat, name)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -38,6 +38,7 @@
 
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.numeric import Int64Index, Float64Index
+from pandas.core.ops import get_op_result_name
 import pandas.compat as compat
 from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
 from pandas.core.indexes.datetimelike import (
@@ -1136,6 +1137,11 @@ def union(self, other):
         y : Index or DatetimeIndex
         """
         self._assert_can_do_setop(other)
+
+        is_corner_case, corner_result = self._union_corner_case(other)
+        if is_corner_case:
+            return corner_result
+
         if not isinstance(other, DatetimeIndex):
             try:
                 other = DatetimeIndex(other)
@@ -1237,7 +1243,7 @@ def _maybe_utc_convert(self, other):
         return this, other
 
     def _wrap_joined_index(self, joined, other):
-        name = self.name if self.name == other.name else None
+        name = get_op_result_name(self, other)
         if (isinstance(other, DatetimeIndex) and
                 self.offset == other.offset and
                 self._can_fast_union(other)):
@@ -1333,8 +1339,8 @@ def __iter__(self):
                                                  box="timestamp")
             return iter(converted)
 
-    def _wrap_union_result(self, other, result):
-        name = self.name if self.name == other.name else None
+    def _wrap_setop_result(self, other, result):
+        name = get_op_result_name(self, other)
         if not timezones.tz_compare(self.tz, other.tz):
             raise ValueError('Passed item and index have different timezone')
         return self._simple_new(result, name=name, freq=None, tz=self.tz)
@@ -1353,6 +1359,10 @@ def intersection(self, other):
         y : Index or DatetimeIndex
         """
         self._assert_can_do_setop(other)
+
+        if self.equals(other):
+            return self._get_reconciled_name_object(other)
+
         if not isinstance(other, DatetimeIndex):
             try:
                 other = DatetimeIndex(other)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -26,6 +26,7 @@
 from pandas.core.indexes.base import (
     Index, _ensure_index,
     default_pprint, _index_shared_docs)
+from pandas.core.ops import get_op_result_name
 
 from pandas._libs import Timestamp, Timedelta
 from pandas._libs.interval import (
@@ -1351,7 +1352,7 @@ def func(self, other):
                 raise TypeError(msg.format(op=op_name))
 
             result = getattr(self._multiindex, op_name)(other._multiindex)
-            result_name = self.name if self.name == other.name else None
+            result_name = get_op_result_name(self, other)
 
             # GH 19101: ensure empty results have correct dtype
             if result.empty:

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2733,7 +2733,7 @@ def intersection(self, other):
         other_tuples = other._ndarray_values
         uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
         if len(uniq_tuples) == 0:
-            return MultiIndex(levels=[[]] * self.nlevels,
+            return MultiIndex(levels=self.levels,
                               labels=[[]] * self.nlevels,
                               names=result_names, verify_integrity=False)
         else:
@@ -2755,7 +2755,7 @@ def difference(self, other):
             return self
 
         if self.equals(other):
-            return MultiIndex(levels=[[]] * self.nlevels,
+            return MultiIndex(levels=self.levels,
                               labels=[[]] * self.nlevels,
                               names=result_names, verify_integrity=False)
 

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -18,7 +18,7 @@
 from pandas.util._decorators import Appender, cache_readonly
 import pandas.core.dtypes.concat as _concat
 import pandas.core.indexes.base as ibase
-
+from pandas.core.ops import get_op_result_name
 
 _num_index_shared_docs = dict()
 
@@ -187,7 +187,7 @@ def _convert_scalar_indexer(self, key, kind=None):
                 ._convert_scalar_indexer(key, kind=kind))
 
     def _wrap_joined_index(self, joined, other):
-        name = self.name if self.name == other.name else None
+        name = get_op_result_name(self, other)
         return Int64Index(joined, name=name)
 
     @classmethod
@@ -264,7 +264,7 @@ def _convert_index_indexer(self, keyarr):
         return keyarr
 
     def _wrap_joined_index(self, joined, other):
-        name = self.name if self.name == other.name else None
+        name = get_op_result_name(self, other)
         return UInt64Index(joined, name=name)
 
     @classmethod

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -19,6 +19,7 @@
     _ensure_object)
 from pandas.core.dtypes.dtypes import PeriodDtype
 from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.ops import get_op_result_name
 
 import pandas.tseries.frequencies as frequencies
 from pandas.tseries.frequencies import get_freq_code as _gfc
@@ -962,6 +963,12 @@ def _convert_tolerance(self, tolerance, target):
                              'target index size')
         return self._maybe_convert_timedelta(tolerance)
 
+    def _create_empty_index(self, name):
+        """
+        Returns an empty index using freq of self
+        """
+        return PeriodIndex([], freq=self.freq, name=name)
+
     def insert(self, loc, item):
         if not isinstance(item, Period) or self.freq != item.freq:
             return self.astype(object).insert(loc, item)
@@ -996,8 +1003,8 @@ def _assert_can_do_setop(self, other):
             msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
             raise IncompatibleFrequency(msg)
 
-    def _wrap_union_result(self, other, result):
-        name = self.name if self.name == other.name else None
+    def _wrap_setop_result(self, other, result):
+        name = get_op_result_name(self, other)
         result = self._apply_meta(result)
         result.name = name
         return result

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -256,7 +256,8 @@ def tolist(self):
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, values=None, **kwargs):
         if values is None:
-            return RangeIndex(name=self.name, fastpath=True,
+            name = kwargs.get("name", self.name)
+            return RangeIndex(name=name, fastpath=True,
                               **dict(self._get_data_as_items()))
         else:
             kwargs.setdefault('name', self.name)
@@ -337,6 +338,10 @@ def intersection(self, other):
         -------
         intersection : Index
         """
+
+        if self.equals(other):
+            return self._get_reconciled_name_object(other)
+
         if not isinstance(other, RangeIndex):
             return super(RangeIndex, self).intersection(other)
 
@@ -417,10 +422,10 @@ def union(self, other):
         union : Index
         """
         self._assert_can_do_setop(other)
-        if len(other) == 0 or self.equals(other):
-            return self
-        if len(self) == 0:
-            return other
+        is_corner_case, corner_result = self._union_corner_case(other)
+        if is_corner_case:
+            return corner_result
+
         if isinstance(other, RangeIndex):
             start_s, step_s = self._start, self._step
             end_s = self._start + self._step * (len(self) - 1)
@@ -474,6 +479,12 @@ def join(self, other, how='left', level=None, return_indexers=False,
     def _concat_same_dtype(self, indexes, name):
         return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
 
+    def _create_empty_index(self, name):
+        """
+        Returns an empty index using step size of self
+        """
+        return RangeIndex(start=None, stop=None, step=self._step, name=name)
+
     def __len__(self):
         """
         return the length of the RangeIndex