Skip to content

BUG: fix Index.reindex to preserve name when target is list/ndarray #8460

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1012,3 +1012,4 @@ Bug Fixes
- Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`)
- Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`)
- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`)
- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`)
6 changes: 2 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2260,17 +2260,15 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
def _reindex_index(self, new_index, method, copy, level, fill_value=NA,
limit=None):
new_index, indexer = self.index.reindex(new_index, method, level,
limit=limit,
copy_if_needed=True)
limit=limit)
return self._reindex_with_indexers({0: [new_index, indexer]},
copy=copy, fill_value=fill_value,
allow_dups=False)

def _reindex_columns(self, new_columns, copy, level, fill_value=NA,
limit=None):
new_columns, indexer = self.columns.reindex(new_columns, level=level,
limit=limit,
copy_if_needed=True)
limit=limit)
return self._reindex_with_indexers({1: [new_columns, indexer]},
copy=copy, fill_value=fill_value,
allow_dups=False)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,8 +1776,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
axis_name = self._get_axis_name(axis)
axis_values = self._get_axis(axis_name)
method = com._clean_fill_method(method)
new_index, indexer = axis_values.reindex(
labels, method, level, limit=limit, copy_if_needed=True)
new_index, indexer = axis_values.reindex(labels, method, level,
limit=limit)
return self._reindex_with_indexers(
{axis: [new_index, indexer]}, method=method, fill_value=fill_value,
limit=limit, copy=copy)
Expand Down
50 changes: 30 additions & 20 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1578,34 +1578,31 @@ def _get_method(self, method):
}
return aliases.get(method, method)

def reindex(self, target, method=None, level=None, limit=None,
copy_if_needed=False):
def reindex(self, target, method=None, level=None, limit=None):
"""
For Index, simply returns the new index and the results of
get_indexer. Provided here to enable an interface that is amenable for
subclasses of Index whose internals are different (like MultiIndex)
Create index with target's values (move/add/delete values as necessary)

Returns
-------
(new_index, indexer, mask) : tuple
new_index : pd.Index
Resulting index
indexer : np.ndarray or None
Indices of output values in original index

"""
# GH6552: preserve names when reindexing to non-named target
# (i.e. neither Index nor Series).
preserve_names = not hasattr(target, 'name')

target = _ensure_index(target)
if level is not None:
if method is not None:
raise TypeError('Fill method not supported if level passed')
_, indexer, _ = self._join_level(target, level, how='right',
return_indexers=True)
else:

if self.equals(target):
indexer = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 this was so hacky before


# to avoid aliasing an existing index
if (copy_if_needed and target.name != self.name and
self.name is not None):
if target.name is None:
target = self.copy()

else:
if self.is_unique:
indexer = self.get_indexer(target, method=method,
Expand All @@ -1616,6 +1613,10 @@ def reindex(self, target, method=None, level=None, limit=None,
"with a method or limit")
indexer, missing = self.get_indexer_non_unique(target)

if preserve_names and target.nlevels == 1 and target.name != self.name:
target = target.copy()
target.name = self.name

return target, indexer

def join(self, other, how='left', level=None, return_indexers=False):
Expand Down Expand Up @@ -3686,17 +3687,21 @@ def get_indexer(self, target, method=None, limit=None):

return com._ensure_platform_int(indexer)

def reindex(self, target, method=None, level=None, limit=None,
copy_if_needed=False):
def reindex(self, target, method=None, level=None, limit=None):
"""
Performs any necessary conversion on the input index and calls
get_indexer. This method is here so MultiIndex and an Index of
like-labeled tuples can play nice together
Create index with target's values (move/add/delete values as necessary)

Returns
-------
(new_index, indexer, mask) : (MultiIndex, ndarray, ndarray)
new_index : pd.MultiIndex
Resulting index
indexer : np.ndarray or None
Indices of output values in original index

"""
# GH6552: preserve names when reindexing to non-named target
# (i.e. neither Index nor Series).
preserve_names = not hasattr(target, 'names')

if level is not None:
if method is not None:
Expand Down Expand Up @@ -3724,6 +3729,11 @@ def reindex(self, target, method=None, level=None, limit=None,
# hopefully?
target = MultiIndex.from_tuples(target)

if (preserve_names and target.nlevels == self.nlevels and
target.names != self.names):
target = target.copy(deep=False)
target.names = self.names

return target, indexer

@cache_readonly
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3088,7 +3088,7 @@ def reindex_axis(self, new_index, axis, method=None, limit=None,
"""
new_index = _ensure_index(new_index)
new_index, indexer = self.axes[axis].reindex(
new_index, method=method, limit=limit, copy_if_needed=True)
new_index, method=method, limit=limit)

return self.reindex_indexer(new_index, indexer, axis=axis,
fill_value=fill_value, copy=copy)
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,36 @@ def test_nan_first_take_datetime(self):
exp = Index([idx[-1], idx[0], idx[1]])
tm.assert_index_equal(res, exp)

def test_reindex_preserves_name_if_target_is_list_or_ndarray(self):
# GH6552
idx = pd.Index([0, 1, 2])

dt_idx = pd.date_range('20130101', periods=3)

idx.name = None
self.assertEqual(idx.reindex([])[0].name, None)
self.assertEqual(idx.reindex(np.array([]))[0].name, None)
self.assertEqual(idx.reindex(idx.tolist())[0].name, None)
self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None)
self.assertEqual(idx.reindex(idx.values)[0].name, None)
self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None)

# Must preserve name even if dtype changes.
self.assertEqual(idx.reindex(dt_idx.values)[0].name, None)
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None)

idx.name = 'foobar'
self.assertEqual(idx.reindex([])[0].name, 'foobar')
self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar')
self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar')
self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar')
self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar')
self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar')

# Must preserve name even if dtype changes.
self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar')
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar')


class Numeric(Base):

Expand Down Expand Up @@ -3267,6 +3297,30 @@ def test_isin_level_kwarg(self):

self.assertRaises(KeyError, idx.isin, vals_1, level='C')

def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
# GH6552
idx = self.index.copy()
target = idx.copy()
idx.names = target.names = [None, None]

other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])

# list & ndarray cases
self.assertEqual(idx.reindex([])[0].names, [None, None])
self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None])
self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None])
self.assertEqual(idx.reindex(target.values)[0].names, [None, None])
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, [None, None])
self.assertEqual(idx.reindex(other_dtype.values)[0].names, [None, None])

idx.names = ['foo', 'bar']
self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar'])
self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar'])


def test_get_combined_index():
from pandas.core.index import _get_combined_index
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3836,17 +3836,14 @@ def test_iloc_empty_list_indexer_is_ok(self):
assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty
assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty

# FIXME: fix loc & xs
def test_loc_empty_list_indexer_is_ok(self):
raise nose.SkipTest('loc discards columns names')
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

haha, didn't even see this!

assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty
assert_frame_equal(df.loc[[],:], df.iloc[:0, :]) # horizontal empty
assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty

def test_ix_empty_list_indexer_is_ok(self):
raise nose.SkipTest('ix discards columns names')
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty
Expand Down