@@ -969,7 +969,7 @@ def read(self) -> DataFrame | Series:
969
969
else :
970
970
return obj
971
971
972
- def _get_object_parser (self , json ) -> DataFrame | Series :
972
+ def _get_object_parser (self , json : str ) -> DataFrame | Series :
973
973
"""
974
974
Parses a json document into a pandas object.
975
975
"""
@@ -985,16 +985,14 @@ def _get_object_parser(self, json) -> DataFrame | Series:
985
985
"date_unit" : self .date_unit ,
986
986
"dtype_backend" : self .dtype_backend ,
987
987
}
988
- obj = None
989
988
if typ == "frame" :
990
- obj = FrameParser (json , ** kwargs ).parse ()
991
-
992
- if typ == "series" or obj is None :
989
+ return FrameParser (json , ** kwargs ).parse ()
990
+ elif typ == "series" :
993
991
if not isinstance (dtype , bool ):
994
992
kwargs ["dtype" ] = dtype
995
- obj = SeriesParser (json , ** kwargs ).parse ()
996
-
997
- return obj
993
+ return SeriesParser (json , ** kwargs ).parse ()
994
+ else :
995
+ raise ValueError ( f" { typ = } must be 'frame' or 'series'." )
998
996
999
997
def close (self ) -> None :
1000
998
"""
@@ -1107,7 +1105,6 @@ def __init__(
1107
1105
self .convert_dates = convert_dates
1108
1106
self .date_unit = date_unit
1109
1107
self .keep_default_dates = keep_default_dates
1110
- self .obj : DataFrame | Series | None = None
1111
1108
self .dtype_backend = dtype_backend
1112
1109
1113
1110
@final
@@ -1121,26 +1118,22 @@ def check_keys_split(self, decoded: dict) -> None:
1121
1118
raise ValueError (f"JSON data had unexpected key(s): { bad_keys_joined } " )
1122
1119
1123
1120
@final
1124
- def parse (self ):
1125
- self ._parse ()
1121
+ def parse (self ) -> DataFrame | Series :
1122
+ obj = self ._parse ()
1126
1123
1127
- if self .obj is None :
1128
- return None
1129
1124
if self .convert_axes :
1130
- self ._convert_axes ()
1131
- self ._try_convert_types ()
1132
- return self . obj
1125
+ obj = self ._convert_axes (obj )
1126
+ obj = self ._try_convert_types (obj )
1127
+ return obj
1133
1128
1134
- def _parse (self ) -> None :
1129
+ def _parse (self ) -> DataFrame | Series :
1135
1130
raise AbstractMethodError (self )
1136
1131
1137
1132
@final
1138
- def _convert_axes (self ) -> None :
1133
+ def _convert_axes (self , obj : DataFrame | Series ) -> DataFrame | Series :
1139
1134
"""
1140
1135
Try to convert axes.
1141
1136
"""
1142
- obj = self .obj
1143
- assert obj is not None # for mypy
1144
1137
for axis_name in obj ._AXIS_ORDERS :
1145
1138
ax = obj ._get_axis (axis_name )
1146
1139
ser = Series (ax , dtype = ax .dtype , copy = False )
@@ -1153,9 +1146,10 @@ def _convert_axes(self) -> None:
1153
1146
)
1154
1147
if result :
1155
1148
new_axis = Index (new_ser , dtype = new_ser .dtype , copy = False )
1156
- setattr (self .obj , axis_name , new_axis )
1149
+ setattr (obj , axis_name , new_axis )
1150
+ return obj
1157
1151
1158
- def _try_convert_types (self ) -> None :
1152
+ def _try_convert_types (self , obj ) :
1159
1153
raise AbstractMethodError (self )
1160
1154
1161
1155
@final
@@ -1182,8 +1176,10 @@ def _try_convert_data(
1182
1176
1183
1177
elif self .dtype is True :
1184
1178
pass
1185
- else :
1186
- # dtype to force
1179
+ elif not _should_convert_dates (
1180
+ convert_dates , self .keep_default_dates , name
1181
+ ):
1182
+ # convert_dates takes precedence over columns listed in dtypes
1187
1183
dtype = (
1188
1184
self .dtype .get (name ) if isinstance (self .dtype , dict ) else self .dtype
1189
1185
)
@@ -1194,8 +1190,8 @@ def _try_convert_data(
1194
1190
return data , False
1195
1191
1196
1192
if convert_dates :
1197
- new_data , result = self ._try_convert_to_date (data )
1198
- if result :
1193
+ new_data = self ._try_convert_to_date (data )
1194
+ if new_data is not data :
1199
1195
return new_data , True
1200
1196
1201
1197
converted = False
@@ -1245,16 +1241,16 @@ def _try_convert_data(
1245
1241
return data , converted
1246
1242
1247
1243
@final
1248
- def _try_convert_to_date (self , data : Series ) -> tuple [ Series , bool ] :
1244
+ def _try_convert_to_date (self , data : Series ) -> Series :
1249
1245
"""
1250
1246
Try to parse a ndarray like into a date column.
1251
1247
1252
1248
Try to coerce object in epoch/iso formats and integer/float in epoch
1253
- formats. Return a boolean if parsing was successful.
1249
+ formats.
1254
1250
"""
1255
1251
# no conversion on empty
1256
1252
if not len (data ):
1257
- return data , False
1253
+ return data
1258
1254
1259
1255
new_data = data
1260
1256
@@ -1265,7 +1261,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
1265
1261
try :
1266
1262
new_data = data .astype ("int64" )
1267
1263
except OverflowError :
1268
- return data , False
1264
+ return data
1269
1265
except (TypeError , ValueError ):
1270
1266
pass
1271
1267
@@ -1277,57 +1273,45 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
1277
1273
| (new_data ._values == iNaT )
1278
1274
)
1279
1275
if not in_range .all ():
1280
- return data , False
1276
+ return data
1281
1277
1282
1278
date_units = (self .date_unit ,) if self .date_unit else self ._STAMP_UNITS
1283
1279
for date_unit in date_units :
1284
1280
try :
1285
- new_data = to_datetime (new_data , errors = "raise" , unit = date_unit )
1281
+ return to_datetime (new_data , errors = "raise" , unit = date_unit )
1286
1282
except (ValueError , OverflowError , TypeError ):
1287
1283
continue
1288
- return new_data , True
1289
- return data , False
1284
+ return data
1290
1285
1291
1286
1292
1287
class SeriesParser (Parser ):
1293
1288
_default_orient = "index"
1294
1289
_split_keys = ("name" , "index" , "data" )
1295
- obj : Series | None
1296
1290
1297
- def _parse (self ) -> None :
1291
+ def _parse (self ) -> Series :
1298
1292
data = ujson_loads (self .json , precise_float = self .precise_float )
1299
1293
1300
1294
if self .orient == "split" :
1301
1295
decoded = {str (k ): v for k , v in data .items ()}
1302
1296
self .check_keys_split (decoded )
1303
- self . obj = Series (** decoded )
1297
+ return Series (** decoded )
1304
1298
else :
1305
- self . obj = Series (data )
1299
+ return Series (data )
1306
1300
1307
- def _try_convert_types (self ) -> None :
1308
- if self .obj is None :
1309
- return
1310
- obj , result = self ._try_convert_data (
1311
- "data" , self .obj , convert_dates = self .convert_dates
1312
- )
1313
- if result :
1314
- self .obj = obj
1301
+ def _try_convert_types (self , obj : Series ) -> Series :
1302
+ obj , _ = self ._try_convert_data ("data" , obj , convert_dates = self .convert_dates )
1303
+ return obj
1315
1304
1316
1305
1317
1306
class FrameParser (Parser ):
1318
1307
_default_orient = "columns"
1319
1308
_split_keys = ("columns" , "index" , "data" )
1320
- obj : DataFrame | None
1321
1309
1322
- def _parse (self ) -> None :
1310
+ def _parse (self ) -> DataFrame :
1323
1311
json = self .json
1324
1312
orient = self .orient
1325
1313
1326
- if orient == "columns" :
1327
- self .obj = DataFrame (
1328
- ujson_loads (json , precise_float = self .precise_float ), dtype = None
1329
- )
1330
- elif orient == "split" :
1314
+ if orient == "split" :
1331
1315
decoded = {
1332
1316
str (k ): v
1333
1317
for k , v in ujson_loads (json , precise_float = self .precise_float ).items ()
@@ -1341,90 +1325,61 @@ def _parse(self) -> None:
1341
1325
orig_names ,
1342
1326
is_potential_multi_index (orig_names , None ),
1343
1327
)
1344
- self . obj = DataFrame (dtype = None , ** decoded )
1328
+ return DataFrame (dtype = None , ** decoded )
1345
1329
elif orient == "index" :
1346
- self . obj = DataFrame .from_dict (
1330
+ return DataFrame .from_dict (
1347
1331
ujson_loads (json , precise_float = self .precise_float ),
1348
1332
dtype = None ,
1349
1333
orient = "index" ,
1350
1334
)
1351
1335
elif orient == "table" :
1352
- self . obj = parse_table_schema (json , precise_float = self .precise_float )
1336
+ return parse_table_schema (json , precise_float = self .precise_float )
1353
1337
else :
1354
- self .obj = DataFrame (
1338
+ # includes orient == "columns"
1339
+ return DataFrame (
1355
1340
ujson_loads (json , precise_float = self .precise_float ), dtype = None
1356
1341
)
1357
1342
1358
- def _process_converter (
1359
- self ,
1360
- f : Callable [[Hashable , Series ], tuple [Series , bool ]],
1361
- filt : Callable [[Hashable ], bool ] | None = None ,
1362
- ) -> None :
1363
- """
1364
- Take a conversion function and possibly recreate the frame.
1365
- """
1366
- if filt is None :
1367
- filt = lambda col : True
1368
-
1369
- obj = self .obj
1370
- assert obj is not None # for mypy
1371
-
1372
- needs_new_obj = False
1373
- new_obj = {}
1374
- for i , (col , c ) in enumerate (obj .items ()):
1375
- if filt (col ):
1376
- new_data , result = f (col , c )
1377
- if result :
1378
- c = new_data
1379
- needs_new_obj = True
1380
- new_obj [i ] = c
1381
-
1382
- if needs_new_obj :
1383
- # possibly handle dup columns
1384
- new_frame = DataFrame (new_obj , index = obj .index )
1385
- new_frame .columns = obj .columns
1386
- self .obj = new_frame
1387
-
1388
- def _try_convert_types (self ) -> None :
1389
- if self .obj is None :
1390
- return
1391
- if self .convert_dates :
1392
- self ._try_convert_dates ()
1393
-
1394
- self ._process_converter (
1395
- lambda col , c : self ._try_convert_data (col , c , convert_dates = False )
1343
+ def _try_convert_types (self , obj : DataFrame ) -> DataFrame :
1344
+ arrays = []
1345
+ for col_label , series in obj .items ():
1346
+ result , _ = self ._try_convert_data (
1347
+ col_label ,
1348
+ series ,
1349
+ convert_dates = _should_convert_dates (
1350
+ self .convert_dates ,
1351
+ keep_default_dates = self .keep_default_dates ,
1352
+ col = col_label ,
1353
+ ),
1354
+ )
1355
+ arrays .append (result .array )
1356
+ return DataFrame ._from_arrays (
1357
+ arrays , obj .columns , obj .index , verify_integrity = False
1396
1358
)
1397
1359
1398
- def _try_convert_dates (self ) -> None :
1399
- if self .obj is None :
1400
- return
1401
-
1402
- # our columns to parse
1403
- convert_dates_list_bool = self .convert_dates
1404
- if isinstance (convert_dates_list_bool , bool ):
1405
- convert_dates_list_bool = []
1406
- convert_dates = set (convert_dates_list_bool )
1407
-
1408
- def is_ok (col ) -> bool :
1409
- """
1410
- Return if this col is ok to try for a date parse.
1411
- """
1412
- if col in convert_dates :
1413
- return True
1414
- if not self .keep_default_dates :
1415
- return False
1416
- if not isinstance (col , str ):
1417
- return False
1418
-
1419
- col_lower = col .lower ()
1420
- if (
1421
- col_lower .endswith (("_at" , "_time" ))
1422
- or col_lower == "modified"
1423
- or col_lower == "date"
1424
- or col_lower == "datetime"
1425
- or col_lower .startswith ("timestamp" )
1426
- ):
1427
- return True
1428
- return False
1429
1360
1430
- self ._process_converter (lambda col , c : self ._try_convert_to_date (c ), filt = is_ok )
1361
+ def _should_convert_dates (
1362
+ convert_dates : bool | list [str ],
1363
+ keep_default_dates : bool ,
1364
+ col : Hashable ,
1365
+ ) -> bool :
1366
+ """
1367
+ Return bool whether a DataFrame column should be cast to datetime.
1368
+ """
1369
+ if convert_dates is False :
1370
+ # convert_dates=True means follow keep_default_dates
1371
+ return False
1372
+ elif not isinstance (convert_dates , bool ) and col in set (convert_dates ):
1373
+ return True
1374
+ elif not keep_default_dates :
1375
+ return False
1376
+ elif not isinstance (col , str ):
1377
+ return False
1378
+ col_lower = col .lower ()
1379
+ if (
1380
+ col_lower .endswith (("_at" , "_time" ))
1381
+ or col_lower in {"modified" , "date" , "datetime" }
1382
+ or col_lower .startswith ("timestamp" )
1383
+ ):
1384
+ return True
1385
+ return False
0 commit comments