Skip to content

Fix memory leak with ujson module #49466

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ I/O
- Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
- Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
-
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)

Period
^^^^^^
Expand Down
73 changes: 19 additions & 54 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,18 @@ Numeric decoder derived from TCL library
#include "date_conversions.h"
#include "datetime.h"

static PyTypeObject *type_decimal;
static PyTypeObject *cls_dataframe;
static PyTypeObject *cls_series;
static PyTypeObject *cls_index;
static PyTypeObject *cls_nat;
static PyTypeObject *cls_na;
PyObject *cls_timedelta;

npy_int64 get_nat(void) { return NPY_MIN_INT64; }

typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
size_t *_outLen);

int object_is_decimal_type(PyObject *obj);
int object_is_dataframe_type(PyObject *obj);
int object_is_series_type(PyObject *obj);
int object_is_index_type(PyObject *obj);
int object_is_nat_type(PyObject *obj);
int object_is_na_type(PyObject *obj);

typedef struct __NpyArrContext {
PyObject *array;
char *dataptr;
Expand Down Expand Up @@ -146,44 +145,6 @@ enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES };

int PdBlock_iterNext(JSOBJ, JSONTypeContext *);

void *initObjToJSON(void) {
PyObject *mod_pandas;
PyObject *mod_nattype;
PyObject *mod_natype;
PyObject *mod_decimal = PyImport_ImportModule("decimal");
type_decimal =
(PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal");
Py_DECREF(mod_decimal);

PyDateTime_IMPORT;

mod_pandas = PyImport_ImportModule("pandas");
if (mod_pandas) {
cls_dataframe =
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "DataFrame");
cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index");
cls_series =
(PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series");
Py_DECREF(mod_pandas);
}

mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype");
if (mod_nattype) {
cls_nat =
(PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType");
Py_DECREF(mod_nattype);
}

mod_natype = PyImport_ImportModule("pandas._libs.missing");
if (mod_natype) {
cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType");
Py_DECREF(mod_natype);
}

// GH 31463
return NULL;
}

static TypeContext *createTypeContext(void) {
TypeContext *pc;

Expand Down Expand Up @@ -216,8 +177,7 @@ static TypeContext *createTypeContext(void) {
static PyObject *get_values(PyObject *obj) {
PyObject *values = NULL;

if (PyObject_TypeCheck(obj, cls_index) ||
PyObject_TypeCheck(obj, cls_series)) {
if (object_is_index_type(obj) || object_is_series_type(obj)) {
// The special cases to worry about are dt64tz and category[dt64tz].
// In both cases we want the UTC-localized datetime64 ndarray,
// without going through and object array of Timestamps.
Expand Down Expand Up @@ -1510,12 +1470,12 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
pc->PyTypeToUTF8 = PyUnicodeToUTF8;
tc->type = JT_UTF8;
return;
} else if (PyObject_TypeCheck(obj, type_decimal)) {
} else if (object_is_decimal_type(obj)) {
GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj);
tc->type = JT_DOUBLE;
return;
} else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
if (PyObject_TypeCheck(obj, cls_nat)) {
if (object_is_nat_type(obj)) {
tc->type = JT_NULL;
return;
}
Expand Down Expand Up @@ -1606,14 +1566,14 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
"%R (0d array) is not JSON serializable at the moment",
obj);
goto INVALID;
} else if (PyObject_TypeCheck(obj, cls_na)) {
} else if (object_is_na_type(obj)) {
tc->type = JT_NULL;
return;
}

ISITERABLE:

if (PyObject_TypeCheck(obj, cls_index)) {
if (object_is_index_type(obj)) {
if (enc->outputFormat == SPLIT) {
tc->type = JT_OBJECT;
pc->iterBegin = Index_iterBegin;
Expand All @@ -1637,7 +1597,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}

return;
} else if (PyObject_TypeCheck(obj, cls_series)) {
} else if (object_is_series_type(obj)) {
if (enc->outputFormat == SPLIT) {
tc->type = JT_OBJECT;
pc->iterBegin = Series_iterBegin;
Expand Down Expand Up @@ -1701,7 +1661,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
pc->iterGetValue = NpyArr_iterGetValue;
pc->iterGetName = NpyArr_iterGetName;
return;
} else if (PyObject_TypeCheck(obj, cls_dataframe)) {
} else if (object_is_dataframe_type(obj)) {
if (enc->blkCtxtPassthru) {
pc->pdblock = enc->blkCtxtPassthru;
tc->type =
Expand Down Expand Up @@ -1969,6 +1929,11 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {

PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
PyObject *kwargs) {
PyDateTime_IMPORT;
if (PyDateTimeAPI == NULL) {
return NULL;
}

static char *kwlist[] = {"obj",
"ensure_ascii",
"double_precision",
Expand Down
Loading