Skip to content

Dataframe non unique column renaming #3687

Closed
@hayd

Description

@hayd

If you try and change the column names of a DataFrame with non-unique column names, it seems to throw and error (11.0 and dev).

In [1]: df = pd.DataFrame(np.random.randn(3, 2), columns=['A', 'A'])

In [2]: df.columns = range(2)
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-2-454b6d12f4bf> in <module>()
----> 1 df.columns = range(2)

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.pyc in __setattr__(self, name, value)
   2016                 existing = getattr(self, name)
   2017                 if isinstance(existing, Index):
-> 2018                     super(DataFrame, self).__setattr__(name, value)
   2019                 elif name in self.columns:
   2020                     self[name] = value

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/lib.so in pandas.lib.AxisProperty.__set__ (pandas/lib.c:28448)()

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/generic.pyc in _set_axis(self, axis, labels)
    557
    558     def _set_axis(self, axis, labels):
--> 559         self._data.set_axis(axis, labels)
    560         self._clear_item_cache()
    561

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/internals.pyc in set_axis(self, axis, value)
    920         if axis == 0:
    921             for block in self.blocks:
--> 922                 block.set_ref_items(self.items, maybe_rename=True)
    923
    924     # make items read only for now

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/internals.pyc in set_ref_items(self, ref_items, maybe_rename)
     72             raise AssertionError('block ref_items must be an Index')
     73         if maybe_rename:
---> 74             self.items = ref_items.take(self.ref_locs)
     75         self.ref_items = ref_items
     76

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/internals.pyc in ref_locs(self)
     57     def ref_locs(self):
     58         if self._ref_locs is None:
---> 59             indexer = self.ref_items.get_indexer(self.items)
     60             indexer = com._ensure_platform_int(indexer)
     61             if (indexer == -1).any():

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/index.pyc in get_indexer(self, target, method, limit)
    847
    848         if not self.is_unique:
--> 849             raise Exception('Reindexing only valid with uniquely valued Index '
    850                             'objects')
    851

Exception: Reindexing only valid with uniquely valued Index objects

The similar behaviour (with index) doesn't.

From this SO question.

What makes this problem a little more annoying is that once you've seen this error you can no longer use df (!):

In [5]: df
Out[5]: ---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-7ed0097d7e9e> in <module>()
----> 1 df

/Library/Python/2.7/site-packages/IPython/core/displayhook.pyc in __call__(self, result)
    236             self.start_displayhook()
    237             self.write_output_prompt()
--> 238             format_dict = self.compute_format_data(result)
    239             self.write_format_data(format_dict)
    240             self.update_user_ns(result)

/Library/Python/2.7/site-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
    148             MIME type representation of the object.
    149         """
--> 150         return self.shell.display_formatter.format(result)
    151
    152     def write_format_data(self, format_dict):

/Library/Python/2.7/site-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
    124                     continue
    125             try:
--> 126                 data = formatter(obj)
    127             except:
    128                 # FIXME: log the exception

/Library/Python/2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
    445                 type_pprinters=self.type_printers,
    446                 deferred_pprinters=self.deferred_printers)
--> 447             printer.pretty(obj)
    448             printer.flush()
    449             return stream.getvalue()

/Library/Python/2.7/site-packages/IPython/lib/pretty.pyc in pretty(self, obj)
    358                             if callable(meth):
    359                                 return meth(obj, self, cycle)
--> 360             return _default_pprint(obj, self, cycle)
    361         finally:
    362             self.end_group()

/Library/Python/2.7/site-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
    478     if getattr(klass, '__repr__', None) not in _baseclass_reprs:
    479         # A user-provided repr.
--> 480         p.text(repr(obj))
    481         return
    482     p.begin_group(1, '<')

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in __repr__(self)
    725         Yields Bytestring in Py2, Unicode String in py3.
    726         """
--> 727         return str(self)
    728
    729     def _repr_html_(self):

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in __str__(self)
    668         if py3compat.PY3:
    669             return self.__unicode__()
--> 670         return self.__bytes__()
    671
    672     def __bytes__(self):

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in __bytes__(self)
    678         """
    679         encoding = com.get_option("display.encoding")
--> 680         return self.__unicode__().encode(encoding, 'replace')
    681
    682     def __unicode__(self):

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in __unicode__(self)
    693             # This needs to compute the entire repr
    694             # so don't do it unless rownum is bounded
--> 695             fits_horizontal = self._repr_fits_horizontal_()
    696
    697         if fits_vertical and fits_horizontal:

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in _repr_fits_horizontal_(self)
    653             d=d.iloc[:min(max_rows, height,len(d))]
    654
--> 655         d.to_string(buf=buf)
    656         value = buf.getvalue()
    657         repr_width = max([len(l) for l in value.split('\n')])

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, nanRep, index_names, justify, force_unicode, line_width)
   1542                                            header=header, index=index,
   1543                                            line_width=line_width)
-> 1544         formatter.to_string()
   1545
   1546         if buf is None:

/Users/234BroadWalk/pandas/pandas/core/format.pyc in to_string(self, force_unicode)
    292             text = info_line
    293         else:
--> 294             strcols = self._to_str_columns()
    295             if self.line_width is None:
    296                 text = adjoin(1, *strcols)

/Users/234BroadWalk/pandas/pandas/core/format.pyc in _to_str_columns(self)
    245         for i, c in enumerate(self.columns):
    246             if self.header:
--> 247                 fmt_values = self._format_col(i)
    248                 cheader = str_columns[i]
    249

/Users/234BroadWalk/pandas/pandas/core/format.pyc in _format_col(self, i)
    383     def _format_col(self, i):
    384         formatter = self._get_formatter(i)
--> 385         return format_array(self.frame.icol(i).values, formatter,
    386                             float_format=self.float_format,
    387                             na_rep=self.na_rep,

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in icol(self, i)
   1911
   1912     def icol(self, i):
-> 1913         return self._ixs(i,axis=1)
   1914
   1915     def _ixs(self, i, axis=0, copy=False):

/Users/234BroadWalk/pandas/pandas/core/frame.pyc in _ixs(self, i, axis, copy)
   1961                     return self.take(i, axis=1, convert=True)
   1962
-> 1963                 values = self._data.iget(i)
   1964                 return self._col_klass.from_array(values, index=self.index,
   1965                                                   name=label)

/Users/234BroadWalk/pandas/pandas/core/internals.pyc in iget(self, i)
   1649         item = self.items[i]
   1650         if self.items.is_unique:
-> 1651             return self.get(item)
   1652
   1653         # compute the duplicative indexer if needed

/Users/234BroadWalk/pandas/pandas/core/internals.pyc in get(self, item)
   1643
   1644     def get(self, item):
-> 1645         _, block = self._find_block(item)
   1646         return block.get(item)
   1647

TypeError: 'NoneType' object is not iterable

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIndexingRelated to indexing on series/frames, not to indexes themselves

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions