Skip to content

BUG: pivot_table with margins=True fails for categorical dtype #10989

Closed
@jakevdp

Description

@jakevdp

First, an example that works as expected (non-categorical):

In [22]: pd.__version__
Out[22]: '0.16.2'

In [23]: data = pd.DataFrame({'x': np.arange(99),
                     'y': np.arange(99) // 50,
                     'z': np.arange(99) % 3})

In [24]: data.pivot_table('x', 'y', 'z')
Out[24]: 
z     0     1     2
y                  
0  24.0  25.0  24.5
1  73.5  74.5  74.0

In [25]: data.pivot_table('x', 'y', 'z', margins=True)
Out[25]: 
z       0     1     2   All
y                          
0    24.0  25.0  24.5  24.5
1    73.5  74.5  74.0  74.0
All  48.0  49.0  50.0  49.0

Now convert y and z to categories; pivot table works without margins but fails with:

In [27]: data.y = data.y.astype('category')

In [28]: data.z = data.z.astype('category')

In [29]: data.pivot_table('x', 'y', 'z')
Out[29]: 
z     0     1     2
y                  
0  24.0  25.0  24.5
1  73.5  74.5  74.0

In [32]: data.pivot_table('x', 'y', 'z', margins=True)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check)
   2979         try:
-> 2980             loc = self.items.get_loc(item)
   2981         except KeyError:

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in get_loc(self, key, method)
   5072             key = tuple(map(_maybe_str_to_time_stamp, key, self.levels))
-> 5073             return self._engine.get_loc(key)
   5074 

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()

KeyError: ('x', 'All')

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-32-7436e0e1c9bb> in <module>()
----> 1 data.pivot_table('x', 'y', 'z', margins=True)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna)
    141     if margins:
    142         table = _add_margins(table, data, values, rows=index,
--> 143                              cols=columns, aggfunc=aggfunc)
    144 
    145     # discard the top level

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _add_margins(table, data, values, rows, cols, aggfunc)
    167 
    168     if values:
--> 169         marginal_result_set = _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin)
    170         if not isinstance(marginal_result_set, tuple):
    171             return marginal_result_set

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin)
    236                 # we are going to mutate this, so need to copy!
    237                 piece = piece.copy()
--> 238                 piece[all_key] = margin[key]
    239 
    240                 table_pieces.append(piece)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2125         else:
   2126             # set column
-> 2127             self._set_item(key, value)
   2128 
   2129     def _setitem_slice(self, key, value):

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2203         self._ensure_valid_index(value)
   2204         value = self._sanitize_column(key, value)
-> 2205         NDFrame._set_item(self, key, value)
   2206 
   2207         # check if we are modifying a copy

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/generic.py in _set_item(self, key, value)
   1194 
   1195     def _set_item(self, key, value):
-> 1196         self._data.set(key, value)
   1197         self._clear_item_cache()
   1198 

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check)
   2981         except KeyError:
   2982             # This item wasn't present, just insert at end
-> 2983             self.insert(len(self.items), item, value)
   2984             return
   2985 

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
   3100             self._blknos = np.insert(self._blknos, loc, len(self.blocks))
   3101 
-> 3102         self.axes[0] = self.items.insert(loc, item)
   3103 
   3104         self.blocks += (block,)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item)
   5583                 # other labels
   5584                 lev_loc = len(level)
-> 5585                 level = level.insert(lev_loc, k)
   5586             else:
   5587                 lev_loc = level.get_loc(k)

/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item)
   3217         code = self.categories.get_indexer([item])
   3218         if (code == -1):
-> 3219             raise TypeError("cannot insert an item into a CategoricalIndex that is not already an existing category")
   3220 
   3221         codes = self.codes

TypeError: cannot insert an item into a CategoricalIndex that is not already an existing category

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugCategoricalCategorical Data TypeReshapingConcat, Merge/Join, Stack/Unstack, Explode

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions