pandas-dev · phofl · Feb 10, 2023 · Dec 13, 2022 · Jan 20, 2023 · Jan 20, 2023
@@ -324,6 +324,9 @@ def closed(self) -> bool:
 # read_csv engines
 CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
 
+# read_json engines
+JSONEngine = Literal["ujson", "pyarrow"]
+
 # read_xml parsers
 XMLParsers = Literal["lxml", "etree"]
 

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -34,11 +34,13 @@
     DtypeArg,
     FilePath,
     IndexLabel,
+    JSONEngine,
     JSONSerializable,
     ReadBuffer,
     StorageOptions,
     WriteBuffer,
 )
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
 
@@ -401,6 +403,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> JsonReader[Literal["frame"]]:
     ...
 
@@ -425,6 +428,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> JsonReader[Literal["series"]]:
     ...
 
@@ -449,6 +453,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> Series:
     ...
 
@@ -473,6 +478,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> DataFrame:
     ...
 
@@ -500,6 +506,7 @@ def read_json(
     nrows: int | None = None,
     storage_options: StorageOptions = None,
     use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
+    engine: JSONEngine = "ujson",
 ) -> DataFrame | Series | JsonReader:
     """
     Convert a JSON string to pandas object.
@@ -653,6 +660,9 @@ def read_json(
 
         .. versionadded:: 2.0
 
+    engine : {{'ujson', 'pyarrow'}}, default "ujson"
+        Parser engine to use.
+
     Returns
     -------
     Series or DataFrame
@@ -771,6 +781,7 @@ def read_json(
         storage_options=storage_options,
         encoding_errors=encoding_errors,
         use_nullable_dtypes=use_nullable_dtypes,
+        engine=engine,
     )
 
     if chunksize:
@@ -807,6 +818,7 @@ def __init__(
         storage_options: StorageOptions = None,
         encoding_errors: str | None = "strict",
         use_nullable_dtypes: bool = False,
+        engine: JSONEngine = "ujson",
     ) -> None:
 
         self.orient = orient
@@ -818,6 +830,7 @@ def __init__(
         self.precise_float = precise_float
         self.date_unit = date_unit
         self.encoding = encoding
+        self.engine = engine
         self.compression = compression
         self.storage_options = storage_options
         self.lines = lines
@@ -832,13 +845,30 @@ def __init__(
             self.chunksize = validate_integer("chunksize", self.chunksize, 1)
             if not self.lines:
                 raise ValueError("chunksize can only be passed if lines=True")
+            if self.engine == "pyarrow":
+                raise ValueError(
+                    "currently pyarrow engine doesn't support chunksize parameter"
+                )
         if self.nrows is not None:
             self.nrows = validate_integer("nrows", self.nrows, 0)
             if not self.lines:
                 raise ValueError("nrows can only be passed if lines=True")
+        if self.engine == "pyarrow":
+            if not self.lines:
+                raise ValueError(
+                    "currently pyarrow engine only supports "
+                    "the line-delimited JSON format"
+                )
+        if self.engine not in {"pyarrow", "ujson"}:
+            raise ValueError(
+                f"The engine type {self.engine} is currently not supported."
+            )
 
-        data = self._get_data_from_filepath(filepath_or_buffer)
-        self.data = self._preprocess_data(data)
+        if self.engine == "pyarrow":
+            self.data = filepath_or_buffer
+        elif self.engine == "ujson":
+            data = self._get_data_from_filepath(filepath_or_buffer)
+            self.data = self._preprocess_data(data)
 
     def _preprocess_data(self, data):
         """
@@ -923,19 +953,24 @@ def read(self) -> DataFrame | Series:
         """
         obj: DataFrame | Series
         with self:
-            if self.lines:
-                if self.chunksize:
-                    obj = concat(self)
-                elif self.nrows:
-                    lines = list(islice(self.data, self.nrows))
-                    lines_json = self._combine_lines(lines)
-                    obj = self._get_object_parser(lines_json)
+            if self.engine == "pyarrow":
+                pyarrow_json = import_optional_dependency("pyarrow.json")
+                table = pyarrow_json.read_json(self.data)
+                obj = table.to_pandas()
+            elif self.engine == "ujson":
+                if self.lines:
+                    if self.chunksize:
+                        obj = concat(self)
+                    elif self.nrows:
+                        lines = list(islice(self.data, self.nrows))
+                        lines_json = self._combine_lines(lines)
+                        obj = self._get_object_parser(lines_json)
+                    else:
+                        data = ensure_str(self.data)
+                        data_lines = data.split("\n")
+                        obj = self._get_object_parser(self._combine_lines(data_lines))
                 else:
-                    data = ensure_str(self.data)
-                    data_lines = data.split("\n")
-                    obj = self._get_object_parser(self._combine_lines(data_lines))
-            else:
-                obj = self._get_object_parser(self.data)
+                    obj = self._get_object_parser(self.data)
         if self.use_nullable_dtypes:
             return obj.convert_dtypes(infer_objects=False)
         else:

diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
@@ -7,3 +7,31 @@ def orient(request):
     Fixture for orients excluding the table format.
     """
     return request.param
+
+
+@pytest.fixture
+def json_dir_path(datapath):
+    """
+    The directory path to the data files needed for parser tests.
+    """
+    return datapath("io", "json", "data")
+
+
+@pytest.fixture(params=["ujson", "pyarrow"])
+def engine(request):
+    if request.param == "pyarrow":
+        pytest.importorskip("pyarrow.json")
+        return request.param
+    else:
+        return request.param
+
+
+@pytest.fixture
+def json_engine_pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+    """
+    engine = request.getfixturevalue("engine")
+    if engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.node.add_marker(mark)