Closed
Description
This is a strange one... Here's a short repro of the error, which I can reproduce in pandas 0.21-0.25 and numpy 0.15-0.17 (I didn't try further back than that):
>>> import pandas as pd
>>> df = pd.DataFrame({'x': pd.date_range('2019', periods=10, tz='UTC')})
>>> df = df.iloc[:, :5]
>>> df._repr_html_()
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-3-f6e53eba0a8f> in <module>()
2 df = pd.DataFrame({'x': pd.date_range('2019', periods=10, tz='UTC')})
3 df = df.iloc[:, :5]
----> 4 df._repr_html_()
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _repr_html_(self)
667 buf = StringIO("")
668 self.info(buf=buf)
--> 669 # need to escape the <class>, should be the first line.
670 val = buf.getvalue().replace("<", r"<", 1)
671 val = val.replace(">", r">", 1)
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in to_html(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, bold_rows, classes, escape, max_rows, max_cols, show_dimensions, notebook, decimal, border)
1732
1733 >>> df.to_records(column_dtypes={"A": "int32"})
-> 1734 rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)],
1735 dtype=[('I', 'O'), ('A', '<i4'), ('B', '<f8')])
1736
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/format.py in to_html(self, classes, notebook, border)
732 )
733 )
--> 734
735 def _join_multiline(self, *strcols):
736 lwidth = self.line_width
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/format.py in write_result(self, buf)
1206 else:
1207 threshold = None
-> 1208
1209 # if we have a fixed_width, we'll need to try different float_format
1210 def format_values_with(float_format):
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/format.py in _write_body(self, indent)
1369
1370 Examples
-> 1371 --------
1372 Keeps all entries different after rounding:
1373
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/format.py in _write_regular_rows(self, fmt_values, indent)
1403 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1404 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
-> 1405
1406 # Least precision that keeps percentiles unique after rounding
1407 prec = -np.floor(
/usr/local/lib/python3.6/dist-packages/pandas/io/formats/format.py in <genexpr>(.0)
1403 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1404 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
-> 1405
1406 # Least precision that keeps percentiles unique after rounding
1407 prec = -np.floor(
IndexError: list index out of range
It seems that somehow, the second argument of iloc is truncating the underlying data along the wrong axis:
>>> df.shape
(10, 1)
>>> df.values.shape
(5, 1)
>>> print(df)
x
0 2019-01-01 00:00:00+00:00
1 2019-01-02 00:00:00+00:00
2 2019-01-03 00:00:00+00:00
3 2019-01-04 00:00:00+00:00
4 2019-01-05 00:00:00+00:00
5
6
7
8
9
This only appears to happen if the data contains timestamps with a timezone specified. If I remove tz='UTC'
above, everything works properly.