Skip to content

ENH: write Table meta-data (non_index_axes) as a CArray (rather than as meta-data) #6245

Open
@dragoljub

Description

@dragoljub

@jreback

I get this very strange error when writing very wide HDF5 tables. In this case a random float32 array with 1500 columns cant be written as an HDF5 table. However if rename the columns it seems to write fine...

Not sure whats going on.

-Gagi

In [1]: %paste
import numpy as np
import pandas as pd

print np.__version__
print pd.__version__

pd.options.display.large_repr = 'info'

df = pd.DataFrame(np.random.randn(1000,1500), dtype=np.float32)
#df.columns = df.columns.map(lambda x: 'A'+str(x))
df.to_hdf(r'Y:\table.h5', 'df', format='table', nan_rep='')

## -- End pasted text --
1.7.1
0.13.0
---------------------------------------------------------------------------
HDF5ExtError                              Traceback (most recent call last)
<ipython-input-1-ae7bcadf7834> in <module>()
      9 df = pd.DataFrame(np.random.randn(1000,1500), dtype=np.float32)
     10 #df.columns = df.columns.map(lambda x: 'A'+str(x))
---> 11 df.to_hdf(r'Y:\table.h5', 'df', format='table', nan_rep='')

D:\Python27\lib\site-packages\pandas\core\generic.pyc in to_hdf(self, path_or_buf, key, **kwargs)
    860
    861         from pandas.io import pytables
--> 862         return pytables.to_hdf(path_or_buf, key, self, **kwargs)
    863
    864     def to_msgpack(self, path_or_buf=None, **kwargs):

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
    269         with get_store(path_or_buf, mode=mode, complevel=complevel,
    270                        complib=complib) as store:
--> 271             f(store)
    272     else:
    273         f(path_or_buf)

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in <lambda>(store)
    264         f = lambda store: store.append(key, value, **kwargs)
    265     else:
--> 266         f = lambda store: store.put(key, value, **kwargs)
    267
    268     if isinstance(path_or_buf, compat.string_types):

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in put(self, key, value, format, append, **kwargs)
    792             format = get_option("io.hdf.default_format") or 'fixed'
    793         kwargs = self._validate_format(format, kwargs)
--> 794         self._write_to_group(key, value, append=append, **kwargs)
    795
    796     def remove(self, key, where=None, start=None, stop=None):

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
   1244
   1245         # write the object
-> 1246         s.write(obj=value, append=append, complib=complib, **kwargs)
   1247
   1248         if s.is_table and index:

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwa
   3548
   3549             # set the table attributes
-> 3550             self.set_attrs()
   3551
   3552             # create the table

D:\Python27\lib\site-packages\pandas\io\pytables.pyc in set_attrs(self)
   2858         self.attrs.index_cols = self.index_cols()
   2859         self.attrs.values_cols = self.values_cols()
-> 2860         self.attrs.non_index_axes = self.non_index_axes
   2861         self.attrs.data_columns = self.data_columns
   2862         self.attrs.nan_rep = self.nan_rep

D:\Python27\lib\site-packages\tables\attributeset.pyc in __setattr__(self, name, value)
    450
    451         # Set the attribute.
--> 452         self._g__setattr(name, value)
    453
    454         # Log new attribute addition.

D:\Python27\lib\site-packages\tables\attributeset.pyc in _g__setattr(self, name, value)
    393             value = stvalue[()]
    394
--> 395         self._g_setattr(self._v_node, name, stvalue)
    396
    397         # New attribute or value. Introduce it into the local

D:\Python27\lib\site-packages\tables\hdf5extension.pyd in tables.hdf5extension.AttributeSet._g_setattr (tables\hdf5extension.c:6334)()

HDF5ExtError: HDF5 error back trace

  File "..\..\hdf5-1.8.11\src\H5A.c", line 254, in H5Acreate2
    unable to create attribute
  File "..\..\hdf5-1.8.11\src\H5A.c", line 503, in H5A_create
    unable to create attribute in object header
  File "..\..\hdf5-1.8.11\src\H5Oattribute.c", line 347, in H5O_attr_create
    unable to create new attribute in header
  File "..\..\hdf5-1.8.11\src\H5Omessage.c", line 224, in H5O_msg_append_real
    unable to create new message
  File "..\..\hdf5-1.8.11\src\H5Omessage.c", line 1945, in H5O_msg_alloc
    unable to allocate space for message
  File "..\..\hdf5-1.8.11\src\H5Oalloc.c", line 1142, in H5O_alloc
    object header message is too large

End of HDF5 error back trace

Can't set attribute 'non_index_axes' in node:
 /df (Group) u''.

In [2]: df.columns = df.columns.map(lambda x: 'A'+str(x))

In [3]: df.to_hdf(r'Y:\tabl2e3.h5', 'df', format='table', nan_rep='')

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions