Skip to content

Commit 92bd9cb

Browse files
GH1033 Add overloads of engine for pd.read_json (#1035)
* GHXXX Add overloads of engine for pd.read_json * GH1033 PR Feedback * GH1033 PR Feedback * GH1033 Fix ignore type * GH1033 PR feedback
1 parent e610b76 commit 92bd9cb

File tree

2 files changed

+135
-1
lines changed

2 files changed

+135
-1
lines changed

pandas-stubs/io/json/_json.pyi

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,61 @@ def read_json(
4848
nrows: int | None = ...,
4949
storage_options: StorageOptions = ...,
5050
dtype_backend: DtypeBackend | NoDefault = ...,
51+
engine: Literal["ujson"] = ...,
5152
) -> JsonReader[Series]: ...
5253
@overload
5354
def read_json(
54-
path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
55+
path_or_buf: FilePath | ReadBuffer[bytes],
56+
*,
57+
orient: JsonSeriesOrient | None = ...,
58+
typ: Literal["series"],
59+
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
60+
convert_axes: bool | None = ...,
61+
convert_dates: bool | list[str] = ...,
62+
keep_default_dates: bool = ...,
63+
precise_float: bool = ...,
64+
date_unit: TimeUnit | None = ...,
65+
encoding: str | None = ...,
66+
encoding_errors: (
67+
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
68+
| None
69+
) = ...,
70+
lines: Literal[True],
71+
chunksize: int,
72+
compression: CompressionOptions = ...,
73+
nrows: int | None = ...,
74+
storage_options: StorageOptions = ...,
75+
dtype_backend: DtypeBackend | NoDefault = ...,
76+
engine: Literal["pyarrow"],
77+
) -> JsonReader[Series]: ...
78+
@overload
79+
def read_json(
80+
path_or_buf: FilePath | ReadBuffer[bytes],
81+
*,
82+
orient: JsonFrameOrient | None = ...,
83+
typ: Literal["frame"] = ...,
84+
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
85+
convert_axes: bool | None = ...,
86+
convert_dates: bool | list[str] = ...,
87+
keep_default_dates: bool = ...,
88+
precise_float: bool = ...,
89+
date_unit: TimeUnit | None = ...,
90+
encoding: str | None = ...,
91+
encoding_errors: (
92+
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
93+
| None
94+
) = ...,
95+
lines: Literal[True],
96+
chunksize: int,
97+
compression: CompressionOptions = ...,
98+
nrows: int | None = ...,
99+
storage_options: StorageOptions = ...,
100+
dtype_backend: DtypeBackend | NoDefault = ...,
101+
engine: Literal["ujson"] = ...,
102+
) -> JsonReader[DataFrame]: ...
103+
@overload
104+
def read_json(
105+
path_or_buf: FilePath | ReadBuffer[bytes],
55106
*,
56107
orient: JsonFrameOrient | None = ...,
57108
typ: Literal["frame"] = ...,
@@ -72,6 +123,7 @@ def read_json(
72123
nrows: int | None = ...,
73124
storage_options: StorageOptions = ...,
74125
dtype_backend: DtypeBackend | NoDefault = ...,
126+
engine: Literal["pyarrow"],
75127
) -> JsonReader[DataFrame]: ...
76128
@overload
77129
def read_json(
@@ -96,6 +148,32 @@ def read_json(
96148
nrows: int | None = ...,
97149
storage_options: StorageOptions = ...,
98150
dtype_backend: DtypeBackend | NoDefault = ...,
151+
engine: Literal["ujson"] = ...,
152+
) -> Series: ...
153+
@overload
154+
def read_json(
155+
path_or_buf: FilePath | ReadBuffer[bytes],
156+
*,
157+
orient: JsonSeriesOrient | None = ...,
158+
typ: Literal["series"],
159+
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
160+
convert_axes: bool | None = ...,
161+
convert_dates: bool | list[str] = ...,
162+
keep_default_dates: bool = ...,
163+
precise_float: bool = ...,
164+
date_unit: TimeUnit | None = ...,
165+
encoding: str | None = ...,
166+
encoding_errors: (
167+
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
168+
| None
169+
) = ...,
170+
lines: Literal[True],
171+
chunksize: None = ...,
172+
compression: CompressionOptions = ...,
173+
nrows: int | None = ...,
174+
storage_options: StorageOptions = ...,
175+
dtype_backend: DtypeBackend | NoDefault = ...,
176+
engine: Literal["pyarrow"],
99177
) -> Series: ...
100178
@overload
101179
def read_json(
@@ -120,6 +198,32 @@ def read_json(
120198
nrows: int | None = ...,
121199
storage_options: StorageOptions = ...,
122200
dtype_backend: DtypeBackend | NoDefault = ...,
201+
engine: Literal["ujson"] = ...,
202+
) -> DataFrame: ...
203+
@overload
204+
def read_json(
205+
path_or_buf: FilePath | ReadBuffer[bytes],
206+
*,
207+
orient: JsonFrameOrient | None = ...,
208+
typ: Literal["frame"] = ...,
209+
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
210+
convert_axes: bool | None = ...,
211+
convert_dates: bool | list[str] = ...,
212+
keep_default_dates: bool = ...,
213+
precise_float: bool = ...,
214+
date_unit: TimeUnit | None = ...,
215+
encoding: str | None = ...,
216+
encoding_errors: (
217+
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
218+
| None
219+
) = ...,
220+
lines: Literal[True],
221+
chunksize: None = ...,
222+
compression: CompressionOptions = ...,
223+
nrows: int | None = ...,
224+
storage_options: StorageOptions = ...,
225+
dtype_backend: DtypeBackend | NoDefault = ...,
226+
engine: Literal["pyarrow"],
123227
) -> DataFrame: ...
124228

125229
class JsonReader(abc.Iterator, Generic[NDFrameT]):

tests/test_io.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,3 +1714,33 @@ def test_read_excel_index_col() -> None:
17141714
),
17151715
pd.DataFrame,
17161716
)
1717+
1718+
1719+
def test_read_json_engine() -> None:
1720+
"""Test the engine argument for `pd.read_json` introduced with pandas 2.0."""
1721+
data = """{"index": {"0": 0, "1": 1},
1722+
"a": {"0": 1, "1": null},
1723+
"b": {"0": 2.5, "1": 4.5},
1724+
"c": {"0": true, "1": false},
1725+
"d": {"0": "a", "1": "b"},
1726+
"e": {"0": 1577.2, "1": 1577.1}}"""
1727+
check(
1728+
assert_type(pd.read_json(io.StringIO(data), engine="ujson"), pd.DataFrame),
1729+
pd.DataFrame,
1730+
)
1731+
1732+
data_lines = b"""{"col 1":"a","col 2":"b"}
1733+
{"col 1":"c","col 2":"d"}"""
1734+
dd = io.BytesIO(data_lines)
1735+
check(
1736+
assert_type(
1737+
pd.read_json(dd, lines=True, engine="pyarrow"),
1738+
pd.DataFrame,
1739+
),
1740+
pd.DataFrame,
1741+
)
1742+
1743+
if TYPE_CHECKING_INVALID_USAGE:
1744+
pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]
1745+
pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType]
1746+
pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]

0 commit comments

Comments
 (0)