Skip to content

Cannot serialize objects with IntervalIndex using msgpack #19967

Closed
@Liam3851

Description

@Liam3851

Code Sample, a copy-pastable example if possible

From

import numpy as np
import pandas as pd
ser = pd.Series(np.arange(10), pd.interval_range(0, periods=10))
ser.to_msgpack('C:/temp/blah.mpk')

-->
TypeError                                 Traceback (most recent call last)
<ipython-input-8-0da7c2fd5b28> in <module>()
----> 1 pd.Series(np.arange(10), pd.interval_range(0, periods=10)).to_msgpack('C:/temp/blah.mpk')

C:\projects\pandas-dk\pandas\core\generic.py in to_msgpack(self, path_or_buf, encoding, **kwargs)
   1860         from pandas.io import packers
   1861         return packers.to_msgpack(path_or_buf, self, encoding=encoding,
-> 1862                                   **kwargs)
   1863
   1864     def to_sql(self, name, con, schema=None, if_exists='fail', index=True,

C:\projects\pandas-dk\pandas\io\packers.py in to_msgpack(path_or_buf, *args, **kwargs)
    152     if isinstance(path_or_buf, compat.string_types):
    153         with open(path_or_buf, mode) as fh:
--> 154             writer(fh)
    155     elif path_or_buf is None:
    156         buf = compat.BytesIO()

C:\projects\pandas-dk\pandas\io\packers.py in writer(fh)
    147     def writer(fh):
    148         for a in args:
--> 149             fh.write(pack(a, **kwargs))
    150
    151     path_or_buf = _stringify_path(path_or_buf)

C:\projects\pandas-dk\pandas\io\packers.py in pack(o, default, encoding, unicode_errors, use_single_float, autoreset, use_bin_type)
    700                   use_single_float=use_single_float,
    701                   autoreset=autoreset,
--> 702                   use_bin_type=use_bin_type).pack(o)
    703
    704

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer.pack()
    229             return ret
    230
--> 231     cpdef pack(self, object obj):
    232         cdef int ret
    233         ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer.pack()
    231     cpdef pack(self, object obj):
    232         cdef int ret
--> 233         ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
    234         if ret == -1:
    235             raise MemoryError

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer._pack()
    190                         ret = self._pack(k, nest_limit - 1)
    191                         if ret != 0: break
--> 192                         ret = self._pack(v, nest_limit - 1)
    193                         if ret != 0: break
    194             elif PyDict_Check(o):

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer._pack()
    190                         ret = self._pack(k, nest_limit - 1)
    191                         if ret != 0: break
--> 192                         ret = self._pack(v, nest_limit - 1)
    193                         if ret != 0: break
    194             elif PyDict_Check(o):

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer._pack()
    219                 if ret == 0:
    220                     for v in o:
--> 221                         ret = self._pack(v, nest_limit - 1)
    222                         if ret != 0: break
    223             elif not default_used and self._default:

C:\projects\pandas-dk\pandas\io\msgpack\_packer.pyx in pandas.io.msgpack._packer.Packer._pack()
    226                 continue
    227             else:
--> 228                 raise TypeError("can't serialize {thing!r}".format(thing=o))
    229             return ret
    230

TypeError: can't serialize Interval(0, 1, closed='right')

Problem description

Msgpack cannot serialize currently objects which have IntervalIndex. I am not sure if this is related in any way to #13463; there the issue was just serialization of a "Period", but PeriodIndex works. Here in the context of trying to serialize IntervalIndex it appears it's actually trying to serialize the Interval object.

Expected Output

Successful serialization without error.

Output of pd.show_versions()

INSTALLED VERSIONS

commit: e3b87c1
python: 3.6.4.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 62 Stepping 4, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None

pandas: 0.23.0.dev0+422.ge3b87c1
pytest: 3.3.2
pip: 9.0.1
setuptools: 38.4.0
Cython: 0.27.3
numpy: 1.14.0
scipy: 1.0.0
pyarrow: 0.8.0
xarray: 0.10.0
IPython: 6.2.1
sphinx: 1.6.6
patsy: 0.5.0
dateutil: 2.6.1
pytz: 2017.3
blosc: None
bottleneck: 1.2.1
tables: 3.4.2
numexpr: 2.6.4
feather: 0.4.0
matplotlib: 2.1.2
openpyxl: 2.4.10
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.2
lxml: 4.1.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.1
pymysql: 0.7.11.None
psycopg2: None
jinja2: 2.10
s3fs: 0.1.2
fastparquet: 0.1.4
pandas_gbq: None
pandas_datareader: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions