Closed
Description
The following example works with MultiIndex, but fails with simple index, which is bad in terms of generalization / intuitive behaviour:
#Create example:
col_nums = [0]
df = pd.DataFrame({"V1":["a","b","c","d","e", "aaaah!!!"],
"W":["c","d","c","d","c","c"],
"data":np.arange(6)})
df.set_index(["V1","W"], inplace = True)
df.to_csv("testtable.tab",sep = "\t")
# MulitIndex -> works:
sep = "\t"
indexcols =[0,1]
chunksize=5
xbed = "testtable.tab"
%rm 'tempstore.h5'
# create a store
with pd.HDFStore('tempstore.h5') as store:
for nn, chunk in enumerate(pd.read_table(xbed, chunksize=chunksize, sep = sep, index_col= indexcols)):
group = "x"
print(chunk.index.names)
store.append(group, chunk, format = "table",
min_itemsize=dict(zip(chunk.index.names, [32]*len(chunk.index.names))))
print("chunk #" , nn, file = sys.stderr)
# simple Index -> fails:
sep = "\t"
indexcols =[0] # <==== this is the only difference
chunksize=5
xbed = "testtable.tab"
%rm 'tempstore.h5'
# create a store
with pd.HDFStore('tempstore.h5') as store:
for nn, chunk in enumerate(pd.read_table(xbed, chunksize=chunksize, sep = sep, index_col= indexcols)):
group = "x"
print(chunk.index.names)
store.append(group, chunk, format = "table",
min_itemsize=dict(zip(chunk.index.names, [32]*len(chunk.index.names))))
print("chunk #" , nn, file = sys.stderr)
-> ... ValueError: min_itemsize has the key [V1] which is not an axis or data_column