Skip to content

Issue with index_col and read_html #5066

Closed
@cancan101

Description

@cancan101
pd.read_html("http://www.camacau.com/changeLang?lang=en_US&url=/statistic_list",infer_types=False,header=0,index_col=0)

yields:

Traceback (most recent call last)
<ipython-input-114-a13f8ac8a77b> in <module>()
----> 1 foo2 = pd.read_html("http://www.camacau.com/changeLang?lang=en_US&url=/statistic_list",infer_types=False,header=0,index_col=0)

/usr/local/lib/python2.7/dist-packages/pandas/io/html.pyc in read_html(io, match, flavor, header, index_col, skiprows, infer_types, attrs)
    904                              'data (you passed a negative value)')
    905     return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
--> 906                   attrs)

/usr/local/lib/python2.7/dist-packages/pandas/io/html.pyc in _parse(flavor, io, match, header, index_col, skiprows, infer_types, attrs)
    776 
    777     return [_data_to_frame(table, header, index_col, infer_types, skiprows)
--> 778             for table in tables]
    779 
    780 

/usr/local/lib/python2.7/dist-packages/pandas/io/html.pyc in _data_to_frame(data, header, index_col, infer_types, skiprows)
    674 
    675         # drop by default
--> 676         df.set_index(cols, inplace=True)
    677         if df.index.nlevels == 1:
    678             if isnull(df.index.name) or not df.index.name:

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in set_index(self, keys, drop, append, inplace, verify_integrity)
   2833             arrays.append(level)
   2834 
-> 2835         index = MultiIndex.from_arrays(arrays, names=names)
   2836 
   2837         if verify_integrity and not index.is_unique:

/usr/local/lib/python2.7/dist-packages/pandas/core/index.pyc in from_arrays(cls, arrays, sortorder, names)
   1763         if len(arrays) == 1:
   1764             name = None if names is None else names[0]
-> 1765             return Index(arrays[0], name=name)
   1766 
   1767         cats = [Categorical.from_array(arr) for arr in arrays]

/usr/local/lib/python2.7/dist-packages/pandas/core/index.pyc in __new__(cls, data, dtype, copy, name, **kwargs)
    108                 return Int64Index(data, copy=copy, dtype=dtype, name=name)
    109 
--> 110             subarr = com._asarray_tuplesafe(data, dtype=object)
    111         elif np.isscalar(data):
    112             raise ValueError('Index(...) must be called with a collection '

/usr/local/lib/python2.7/dist-packages/pandas/core/common.pyc in _asarray_tuplesafe(values, dtype)
   1489             # in numpy, leading to the following
   1490             result = np.empty(len(values), dtype=object)
-> 1491             result[:] = values
   1492 
   1493     return result

ValueError: could not broadcast input array from shape (11,2) into shape (11)

Metadata

Metadata

Assignees

No one assigned

    Labels

    IO DataIO issues that don't fit into a more specific labelIO HTMLread_html, to_html, Styler.apply, Styler.applymap

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions