Skip to content

Commit 4eec138

Browse files
committed
feat: bigframes.bigquery.json_extract
1 parent 2c81086 commit 4eec138

File tree

4 files changed

+89
-0
lines changed

4 files changed

+89
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,41 @@ def json_set(
208208
return series
209209

210210

211+
def json_extract(
212+
series: series.Series,
213+
json_path: str,
214+
) -> series.Series:
215+
"""Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
216+
value. This function uses single quotes and brackets to escape invalid JSONPath
217+
characters in JSON keys.
218+
219+
**Examples:**
220+
221+
>>> import bigframes.pandas as bpd
222+
>>> import bigframes.bigquery as bbq
223+
>>> bpd.options.display.progress_bar = None
224+
225+
>>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
226+
>>> bbq.json_extract(s, json_path="$.class")
227+
0 "{\\\"students\\\":[{\\\"id\\\":5},{\\\"id\\\":12}]}"
228+
dtype: string
229+
230+
Args:
231+
series (bigframes.series.Series):
232+
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
233+
json_path (str):
234+
The JSON path identifying the data that you want to obtain from the input.
235+
236+
Returns:
237+
bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
238+
"""
239+
return series._apply_unary_op(ops.JSONExtract(json_path=json_path))
240+
241+
242+
# Search functions defined from
243+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions
244+
245+
211246
def vector_search(
212247
base_table: str,
213248
column_to_search: str,

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,11 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
914914
).to_expr()
915915

916916

917+
@scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True)
918+
def json_extract_op_impl(x: ibis_types.Value, op: ops.JSONExtract):
919+
return json_extract(json_obj=x, json_path=op.json_path)
920+
921+
917922
### Binary Ops
918923
def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
919924
"""Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""
@@ -1501,3 +1506,10 @@ def json_set(
15011506
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.str, json_value
15021507
) -> ibis_dtypes.JSON:
15031508
"""Produces a new SQL JSON value with the specified JSON data inserted or replaced."""
1509+
1510+
1511+
@ibis.udf.scalar.builtin(name="json_extract")
1512+
def json_extract(
1513+
json_obj: ibis_dtypes.JSON, json_path: ibis_dtypes.str
1514+
) -> ibis_dtypes.JSON:
1515+
"""Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""

bigframes/operations/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,22 @@ def output_type(self, *input_types):
602602
return dtypes.STRING_DTYPE
603603

604604

605+
## JSON Ops
606+
@dataclasses.dataclass(frozen=True)
607+
class JSONExtract(UnaryOp):
608+
name: typing.ClassVar[str] = "json_extract"
609+
json_path: str
610+
611+
def output_type(self, *input_types):
612+
input_type = input_types[0]
613+
if not dtypes.is_json_like(input_type):
614+
raise TypeError(
615+
"Input type must be an valid JSON object or JSON-formatted string type."
616+
+ f" Received type: {input_type}"
617+
)
618+
return input_type
619+
620+
605621
# Binary Ops
606622
fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
607623
maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)

tests/system/small/bigquery/test_json.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,29 @@ def test_json_set_w_invalid_value_type():
110110
def test_json_set_w_invalid_series_type():
111111
with pytest.raises(TypeError):
112112
bbq.json_set(bpd.Series([1, 2]), json_path_value_pairs=[("$.a", 1)])
113+
114+
115+
def test_json_extract_from_json():
116+
s = _get_series_from_json([{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}])
117+
actual = bbq.json_extract(s, "$.a.b")
118+
# After the introduction of the JSON type, the output should be a JSON-formatted series.
119+
expected = _get_series_from_json(["[1,2]", None, "0"])
120+
pd.testing.assert_series_equal(
121+
actual.to_pandas(),
122+
expected.to_pandas(),
123+
)
124+
125+
126+
def test_json_extract_from_string():
127+
s = bpd.Series(['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'])
128+
actual = bbq.json_extract(s, "$.a.b")
129+
expected = bpd.Series(["[1,2]", None, "0"])
130+
pd.testing.assert_series_equal(
131+
actual.to_pandas(),
132+
expected.to_pandas(),
133+
)
134+
135+
136+
def test_json_extract_w_invalid_series_type():
137+
with pytest.raises(TypeError):
138+
bbq.json_extract(bpd.Series([1, 2]), "$.a")

0 commit comments

Comments
 (0)