Skip to content

Commit 41a1aa0

Browse files
author
matthiashuschle
committed
BUG: to_json - prevent various segfault conditions (GH14256)
1 parent 9f0ee53 commit 41a1aa0

File tree

5 files changed

+54
-6
lines changed

5 files changed

+54
-6
lines changed

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ I/O
866866
- Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`)
867867
- Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`)
868868
- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`)
869+
- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`)
869870

870871
Plotting
871872
^^^^^^^^
@@ -940,3 +941,4 @@ Other
940941
^^^^^
941942
- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`)
942943
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
944+

pandas/_libs/src/ujson/lib/ultrajson.h

+7
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,11 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
307307
const char *buffer, size_t cbBuffer);
308308
EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);
309309

310+
#define Buffer_Reserve(__enc, __len) \
311+
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
312+
Buffer_Realloc((__enc), (__len)); \
313+
}
314+
315+
void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded);
316+
310317
#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_

pandas/_libs/src/ujson/lib/ultrajsonenc.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -714,11 +714,6 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
714714
}
715715
}
716716

717-
#define Buffer_Reserve(__enc, __len) \
718-
if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
719-
Buffer_Realloc((__enc), (__len)); \
720-
}
721-
722717
#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr;
723718

724719
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
@@ -976,6 +971,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
976971
}
977972

978973
enc->iterEnd(obj, &tc);
974+
Buffer_Reserve(enc, 2);
979975
Buffer_AppendCharUnchecked(enc, ']');
980976
break;
981977
}
@@ -1003,6 +999,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
1003999
}
10041000

10051001
enc->iterEnd(obj, &tc);
1002+
Buffer_Reserve(enc, 2);
10061003
Buffer_AppendCharUnchecked(enc, '}');
10071004
break;
10081005
}

pandas/_libs/src/ujson/python/objToJSON.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen,
783783
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
784784
PRINTMARK();
785785
*outLen = strlen(labels[idx]);
786+
Buffer_Reserve(enc, *outLen);
786787
memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen));
787788
enc->offset += *outLen;
788789
*outLen = 0;
@@ -879,7 +880,7 @@ int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
879880
NpyArrContext *npyarr;
880881
PRINTMARK();
881882

882-
if (PyErr_Occurred()) {
883+
if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
883884
return 0;
884885
}
885886

@@ -1224,6 +1225,10 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
12241225
PyObject *attrName;
12251226
char *attrStr;
12261227

1228+
if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
1229+
return 0;
1230+
}
1231+
12271232
if (itemValue) {
12281233
Py_DECREF(GET_TC(tc)->itemValue);
12291234
GET_TC(tc)->itemValue = itemValue = NULL;

pandas/tests/io/json/test_pandas.py

+37
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,43 @@ def test_blocks_compat_GH9037(self):
511511
by_blocks=True,
512512
check_exact=True)
513513

514+
def test_frame_nonprintable_bytes(self):
515+
# GH14256: failing column caused segfaults, if it is not the last one
516+
517+
class BinaryThing(object):
518+
519+
def __init__(self, hexed):
520+
self.hexed = hexed
521+
if compat.PY2:
522+
self.binary = hexed.decode('hex')
523+
else:
524+
self.binary = bytes.fromhex(hexed)
525+
526+
def __str__(self):
527+
return self.hexed
528+
529+
hexed = '574b4454ba8c5eb4f98a8f45'
530+
exc_type = OverflowError
531+
binthing = BinaryThing(hexed)
532+
df_printable = DataFrame({'A': [binthing.hexed]})
533+
assert df_printable.to_json() == '{"A":{"0":"%s"}}' % hexed
534+
df_nonprintable = DataFrame({'A': [binthing]})
535+
pytest.raises(exc_type, df_nonprintable.to_json)
536+
df_mixed = DataFrame({'A': [binthing], 'B': [1]},
537+
columns=['A', 'B'])
538+
pytest.raises(exc_type, df_mixed.to_json)
539+
# default_handler should resolve exceptions for non-string types
540+
assert df_nonprintable.to_json(default_handler=str) == \
541+
'{"A":{"0":"%s"}}' % hexed
542+
assert df_mixed.to_json(default_handler=str) == \
543+
'{"A":{"0":"%s"},"B":{"0":1}}' % hexed
544+
545+
def test_label_overflow(self):
546+
# GH14256: buffer length not checked when writing label
547+
df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
548+
assert df.to_json() == \
549+
'{"%s":{"0":1},"foo":{"0":1337}}' % ('bar' * 100000)
550+
514551
def test_series_non_unique_index(self):
515552
s = Series(['a', 'b'], index=[1, 1])
516553

0 commit comments

Comments
 (0)