Closed
Description
First, an example that works as expected (non-categorical):
In [22]: pd.__version__
Out[22]: '0.16.2'
In [23]: data = pd.DataFrame({'x': np.arange(99),
'y': np.arange(99) // 50,
'z': np.arange(99) % 3})
In [24]: data.pivot_table('x', 'y', 'z')
Out[24]:
z 0 1 2
y
0 24.0 25.0 24.5
1 73.5 74.5 74.0
In [25]: data.pivot_table('x', 'y', 'z', margins=True)
Out[25]:
z 0 1 2 All
y
0 24.0 25.0 24.5 24.5
1 73.5 74.5 74.0 74.0
All 48.0 49.0 50.0 49.0
Now convert y
and z
to categories; pivot table works without margins but fails with:
In [27]: data.y = data.y.astype('category')
In [28]: data.z = data.z.astype('category')
In [29]: data.pivot_table('x', 'y', 'z')
Out[29]:
z 0 1 2
y
0 24.0 25.0 24.5
1 73.5 74.5 74.0
In [32]: data.pivot_table('x', 'y', 'z', margins=True)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check)
2979 try:
-> 2980 loc = self.items.get_loc(item)
2981 except KeyError:
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in get_loc(self, key, method)
5072 key = tuple(map(_maybe_str_to_time_stamp, key, self.levels))
-> 5073 return self._engine.get_loc(key)
5074
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()
KeyError: ('x', 'All')
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-32-7436e0e1c9bb> in <module>()
----> 1 data.pivot_table('x', 'y', 'z', margins=True)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna)
141 if margins:
142 table = _add_margins(table, data, values, rows=index,
--> 143 cols=columns, aggfunc=aggfunc)
144
145 # discard the top level
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _add_margins(table, data, values, rows, cols, aggfunc)
167
168 if values:
--> 169 marginal_result_set = _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin)
170 if not isinstance(marginal_result_set, tuple):
171 return marginal_result_set
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/tools/pivot.py in _generate_marginal_results(table, data, values, rows, cols, aggfunc, grand_margin)
236 # we are going to mutate this, so need to copy!
237 piece = piece.copy()
--> 238 piece[all_key] = margin[key]
239
240 table_pieces.append(piece)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2125 else:
2126 # set column
-> 2127 self._set_item(key, value)
2128
2129 def _setitem_slice(self, key, value):
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2203 self._ensure_valid_index(value)
2204 value = self._sanitize_column(key, value)
-> 2205 NDFrame._set_item(self, key, value)
2206
2207 # check if we are modifying a copy
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/generic.py in _set_item(self, key, value)
1194
1195 def _set_item(self, key, value):
-> 1196 self._data.set(key, value)
1197 self._clear_item_cache()
1198
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in set(self, item, value, check)
2981 except KeyError:
2982 # This item wasn't present, just insert at end
-> 2983 self.insert(len(self.items), item, value)
2984 return
2985
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/internals.py in insert(self, loc, item, value, allow_duplicates)
3100 self._blknos = np.insert(self._blknos, loc, len(self.blocks))
3101
-> 3102 self.axes[0] = self.items.insert(loc, item)
3103
3104 self.blocks += (block,)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item)
5583 # other labels
5584 lev_loc = len(level)
-> 5585 level = level.insert(lev_loc, k)
5586 else:
5587 lev_loc = level.get_loc(k)
/Users/jakevdp/anaconda/envs/py3k/lib/python3.3/site-packages/pandas/core/index.py in insert(self, loc, item)
3217 code = self.categories.get_indexer([item])
3218 if (code == -1):
-> 3219 raise TypeError("cannot insert an item into a CategoricalIndex that is not already an existing category")
3220
3221 codes = self.codes
TypeError: cannot insert an item into a CategoricalIndex that is not already an existing category