Closed
Description
Code Sample, a copy-pastable example if possible
# Your code here
# coding: utf-8
import json, traceback, pandas
test = pandas.DataFrame(data = {
'key': ['a', 'a', 'b', 'b', 'a'],
'value': [1, 2, 3, 4, 5]
})
stat = test.groupby('key').agg(['sum', 'mean', 'count'])
stat_json = stat.to_json()
print(stat)
print(stat_json)
try:
json.loads(stat_json)
except ValueError:
traceback.print_exc()
stat_reset = stat.reset_index()
stat_reset_json = stat_reset.to_json()
print(stat_reset)
print(stat_reset_json)
try:
json.loads(stat_reset_json)
except ValueError:
traceback.print_exc()
Result by Python 2.7
value
sum mean count
key
a 8 2.666667 3
b 7 3.500000 2
{"["value","sum"]":{"a":8,"b":7},"["value","mean"]":{"a":2.6666666667,"b":3.5},"["value","count"]":{"a":3,"b":2}}
Traceback (most recent call last):
File "pycheck.py", line 12, in <module>
json.loads(stat_json)
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 380, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting : delimiter: line 1 column 5 (char 4)
key value
sum mean count
0 a 8 2.666667 3
1 b 7 3.500000 2
{"["key",""]":{"0":"a","1":"b"},"["value","sum"]":{"0":8,"1":7},"["value","mean"]":{"0":2.6666666667,"1":3.5},"["value","count"]":{"0":3,"1":2}}
Traceback (most recent call last):
File "pycheck.py", line 20, in <module>
json.loads(stat_reset_json)
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python@2/2.7.14_3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 380, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Expecting : delimiter: line 1 column 5 (char 4)
Result by Python 3
value
sum mean count
key
a 8 2.666667 3
b 7 3.500000 2
{"["value","sum"]":{"a":8,"b":7},"["value","mean"]":{"a":2.6666666667,"b":3.5},"["value","count"]":{"a":3,"b":2}}
Traceback (most recent call last):
File "pycheck.py", line 12, in <module>
json.loads(stat_json)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/decoder.py", line 355, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 1 column 5 (char 4)
key value
sum mean count
0 a 8 2.666667 3
1 b 7 3.500000 2
{"["key",""]":{"0":"a","1":"b"},"["value","sum"]":{"0":8,"1":7},"["value","mean"]":{"0":2.6666666667,"1":3.5},"["value","count"]":{"0":3,"1":2}}
Traceback (most recent call last):
File "pycheck.py", line 20, in <module>
json.loads(stat_reset_json)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/json/decoder.py", line 355, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 1 column 5 (char 4)
Problem description
DataFrame.to_json()
returns malformed JSON when its column contains tuple object, such as ('value', 'sum')
, ('value', 'mean')
etc. in this case.
Expected Output
I think, at least in this case, correct output of double quote in strings should be escaped like this:
{"[\"value\",\"sum\"]":{"a":8,"b":7},"[\"value\",\"mean\"]":{"a":2.6666666667,"b":3.5},"[\"value\",\"count\"]":{"a":3,"b":2}}
{"[\"key\",\"\"]":{"0":"a","1":"b"},"[\"value\",\"sum\"]":{"0":8,"1":7},"[\"value\",\"mean\"]":{"0":2.6666666667,"1":3.5},"[\"value\",\"count\"]":{"0":3,"1":2}}
Output of pd.show_versions()
Python2.7
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.14.final.0
python-bits: 64
OS: Darwin
OS-release: 16.7.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: None.None
pandas: 0.22.0
pytest: None
pip: 9.0.1
setuptools: 38.5.2
Cython: None
numpy: 1.14.1
scipy: None
pyarrow: None
xarray: None
IPython: None
sphinx: 1.6.3
patsy: None
dateutil: 2.7.0
pytz: 2017.2
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.5.3
html5lib: None
sqlalchemy: 1.2.5
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
Python3
INSTALLED VERSIONS
------------------
commit: None
python: 3.6.4.final.0
python-bits: 64
OS: Darwin
OS-release: 16.7.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.22.0
pytest: None
pip: 9.0.1
setuptools: 38.5.2
Cython: None
numpy: 1.14.1
scipy: None
pyarrow: None
xarray: None
IPython: None
sphinx: None
patsy: None
dateutil: 2.7.0
pytz: 2018.3
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: None
sqlalchemy: 1.2.5
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
I have checked past issues briefly, but I'm sorry if same issue was already filed.