googleapis
diff --git a/‎bigframes/bigquery/__init__.py
Lines changed: 3 additions & 1 deletion b/‎bigframes/bigquery/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/bigquery/_operations/json.py
Lines changed: 45 additions & 3 deletions b/‎bigframes/bigquery/_operations/json.py
Lines changed: 45 additions & 3 deletions
diff --git a/‎bigframes/core/bigframe_node.py
Lines changed: 3 additions & 20 deletions b/‎bigframes/core/bigframe_node.py
Lines changed: 3 additions & 20 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/blocks.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 13 additions & 0 deletions b/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py
Lines changed: 10 additions & 2 deletions b/‎bigframes/core/compile/sqlglot/compiler.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎bigframes/core/compile/sqlglot/scalar_compiler.py
Lines changed: 7 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/scalar_compiler.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/sqlglot_ir.py
Lines changed: 70 additions & 4 deletions b/‎bigframes/core/compile/sqlglot/sqlglot_ir.py
Lines changed: 70 additions & 4 deletions
@@ -37,6 +37,7 @@
     json_extract,
     json_extract_array,
     json_extract_string_array,
+    json_query,
     json_set,
     json_value,
     parse_json,
@@ -58,10 +59,11 @@
     "st_distance",
     "st_intersection",
     # json ops
-    "json_set",
     "json_extract",
     "json_extract_array",
     "json_extract_string_array",
+    "json_query",
+    "json_set",
     "json_value",
     "parse_json",
     # search ops
 
@@ -22,9 +22,11 @@
 from __future__ import annotations
 
 from typing import Any, cast, Optional, Sequence, Tuple, Union
+import warnings
 
 import bigframes.core.utils as utils
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.series as series
 
@@ -87,9 +89,13 @@ def json_extract(
     input: series.Series,
     json_path: str,
 ) -> series.Series:
-    """Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
-    value. This function uses single quotes and brackets to escape invalid JSONPath
-    characters in JSON keys.
+    """Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING`` or
+    ``JSON`` value. This function uses single quotes and brackets to escape invalid
+    JSONPath characters in JSON keys.
+
+    .. deprecated:: 2.5.0
+        The ``json_extract`` is deprecated and will be removed in a future version.
+        Use ``json_query`` instead.
 
     **Examples:**
 
@@ -111,6 +117,11 @@ def json_extract(
     Returns:
         bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
     """
+    msg = (
+        "The `json_extract` is deprecated and will be removed in a future version. "
+        "Use `json_query` instead."
+    )
+    warnings.warn(bfe.format_message(msg), category=UserWarning)
     return input._apply_unary_op(ops.JSONExtract(json_path=json_path))
 
 
@@ -231,6 +242,37 @@ def json_extract_string_array(
     return array_series
 
 
+def json_query(
+    input: series.Series,
+    json_path: str,
+) -> series.Series:
+    """Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING``
+    or ``JSON`` value. This function uses double quotes to escape invalid JSONPath
+    characters in JSON keys. For example: ``"a.b"``.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
+        >>> bbq.json_query(s, json_path="$.class")
+        0    {"students":[{"id":5},{"id":12}]}
+        dtype: string
+
+    Args:
+        input (bigframes.series.Series):
+            The Series containing JSON data (as native JSON objects or JSON-formatted strings).
+        json_path (str):
+            The JSON path identifying the data that you want to obtain from the input.
+
+    Returns:
+        bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
+    """
+    return input._apply_unary_op(ops.JSONQuery(json_path=json_path))
+
+
 def json_value(
     input: series.Series,
     json_path: str,
 
@@ -22,7 +22,7 @@
 import typing
 from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Set, Tuple
 
-from bigframes.core import identifiers
+from bigframes.core import field, identifiers
 import bigframes.core.schema as schemata
 import bigframes.dtypes
 
@@ -34,23 +34,6 @@
 T = typing.TypeVar("T")
 
 
-@dataclasses.dataclass(frozen=True)
-class Field:
-    id: identifiers.ColumnId
-    dtype: bigframes.dtypes.Dtype
-    # Best effort, nullable=True if not certain
-    nullable: bool = True
-
-    def with_nullable(self) -> Field:
-        return Field(self.id, self.dtype, nullable=True)
-
-    def with_nonnull(self) -> Field:
-        return Field(self.id, self.dtype, nullable=False)
-
-    def with_id(self, id: identifiers.ColumnId) -> Field:
-        return Field(id, self.dtype, nullable=self.nullable)
-
-
 @dataclasses.dataclass(eq=False, frozen=True)
 class BigFrameNode:
     """
@@ -162,7 +145,7 @@ def roots(self) -> typing.Set[BigFrameNode]:
     # TODO: Store some local data lazily for select, aggregate nodes.
     @property
     @abc.abstractmethod
-    def fields(self) -> Sequence[Field]:
+    def fields(self) -> Sequence[field.Field]:
         ...
 
     @property
@@ -292,7 +275,7 @@ def _dtype_lookup(self) -> dict[identifiers.ColumnId, bigframes.dtypes.Dtype]:
         return {field.id: field.dtype for field in self.fields}
 
     @functools.cached_property
-    def field_by_id(self) -> Mapping[identifiers.ColumnId, Field]:
+    def field_by_id(self) -> Mapping[identifiers.ColumnId, field.Field]:
         return {field.id: field for field in self.fields}
 
     # Plan algorithms
 
@@ -2166,7 +2166,7 @@ def merge(
                 result_columns.append(get_column_left[col_id])
         for col_id in other.value_columns:
             if col_id in right_join_ids:
-                if other.col_id_to_label[matching_right_id] in matching_join_labels:
+                if other.col_id_to_label[col_id] in matching_join_labels:
                     pass
                 else:
                     result_columns.append(get_column_right[col_id])
 
@@ -1356,6 +1356,19 @@ def json_extract_string_array_op_impl(
     return json_extract_string_array(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.JSONQuery, pass_op=True)
+def json_query_op_impl(x: ibis_types.Value, op: ops.JSONQuery):
+    # Define a user-defined function whose returned type is dynamically matching the input.
+    def json_query(json_or_json_string, json_path: ibis_dtypes.str):  # type: ignore
+        """Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""
+        ...
+
+    return_type = x.type()
+    json_query.__annotations__["return"] = return_type
+    json_query_op = ibis_udf.scalar.builtin(json_query)
+    return json_query_op(json_or_json_string=x, json_path=op.json_path)
+
+
 @scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
 def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
     return parse_json(json_str=x)
 
@@ -26,6 +26,7 @@
 import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
 import bigframes.core.compile.sqlglot.sqlglot_ir as ir
 import bigframes.core.ordering as bf_ordering
+from bigframes.core.rewrite import schema_binding
 
 
 class SQLGlotCompiler:
@@ -120,7 +121,14 @@ def _remap_variables(self, node: nodes.ResultNode) -> nodes.ResultNode:
 
     def _compile_result_node(self, root: nodes.ResultNode) -> str:
         sqlglot_ir = self.compile_node(root.child)
-        # TODO: add order_by, limit, and selections to sqlglot_expr
+
+        selected_cols: tuple[tuple[str, sge.Expression], ...] = tuple(
+            (name, scalar_compiler.compile_scalar_expression(ref))
+            for ref, name in root.output_cols
+        )
+        sqlglot_ir = sqlglot_ir.select(selected_cols)
+
+        # TODO: add order_by, limit to sqlglot_expr
         return sqlglot_ir.sql
 
     @functools.lru_cache(maxsize=5000)
@@ -176,6 +184,6 @@ def compile_projection(
 
 def _replace_unsupported_ops(node: nodes.BigFrameNode):
     node = nodes.bottom_up(node, rewrite.rewrite_slice)
-    node = nodes.bottom_up(node, rewrite.rewrite_timedelta_expressions)
+    node = nodes.bottom_up(node, schema_binding.bind_schema_to_expressions)
     node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
     return node
@@ -35,6 +35,13 @@ def compile_deref_expression(expr: expression.DerefOp) -> sge.Expression:
     return sge.ColumnDef(this=sge.to_identifier(expr.id.sql, quoted=True))
 
 
+@compile_scalar_expression.register
+def compile_field_ref_expression(
+    expr: expression.SchemaFieldRefExpression,
+) -> sge.Expression:
+    return sge.ColumnDef(this=sge.to_identifier(expr.field.id.sql, quoted=True))
+
+
 @compile_scalar_expression.register
 def compile_constant_expression(
     expr: expression.ScalarConstantExpression,
 
@@ -128,15 +128,22 @@ def select(
         self,
         selected_cols: tuple[tuple[str, sge.Expression], ...],
     ) -> SQLGlotIR:
-        cols_expr = [
+        selections = [
             sge.Alias(
                 this=expr,
                 alias=sge.to_identifier(id, quoted=self.quoted),
             )
             for id, expr in selected_cols
         ]
-        new_expr = self._encapsulate_as_cte().select(*cols_expr, append=False)
-        return SQLGlotIR(expr=new_expr)
+        # Attempts to simplify selected columns when the original and new column
+        # names are simply aliases of each other.
+        squashed_selections = _squash_selections(self.expr.expressions, selections)
+        if squashed_selections != []:
+            new_expr = self.expr.select(*squashed_selections, append=False)
+            return SQLGlotIR(expr=new_expr)
+        else:
+            new_expr = self._encapsulate_as_cte().select(*selections, append=False)
+            return SQLGlotIR(expr=new_expr)
 
     def project(
         self,
@@ -199,7 +206,7 @@ def _encapsulate_as_cte(
             this=select_expr,
             alias=new_cte_name,
         )
-        new_with_clause = sge.With(expressions=existing_ctes + [new_cte])
+        new_with_clause = sge.With(expressions=[*existing_ctes, new_cte])
         new_select_expr = (
             sge.Select().select(sge.Star()).from_(sge.Table(this=new_cte_name))
         )
@@ -254,3 +261,62 @@ def _table(table: bigquery.TableReference) -> sge.Table:
         db=sg.to_identifier(table.dataset_id, quoted=True),
         catalog=sg.to_identifier(table.project, quoted=True),
     )
+
+
+def _squash_selections(
+    old_expr: list[sge.Expression], new_expr: list[sge.Alias]
+) -> list[sge.Alias]:
+    """
+    Simplifies the select column expressions if existing (old_expr) and
+    new (new_expr) selected columns are both simple aliases of column definitions.
+
+    Example:
+    old_expr: [A AS X, B AS Y]
+    new_expr: [X AS P, Y AS Q]
+    Result:   [A AS P, B AS Q]
+    """
+    old_alias_map: typing.Dict[str, str] = {}
+    for selected in old_expr:
+        column_alias_pair = _get_column_alias_pair(selected)
+        if column_alias_pair is None:
+            return []
+        else:
+            old_alias_map[column_alias_pair[1]] = column_alias_pair[0]
+
+    new_selected_cols: typing.List[sge.Alias] = []
+    for selected in new_expr:
+        column_alias_pair = _get_column_alias_pair(selected)
+        if column_alias_pair is None or column_alias_pair[0] not in old_alias_map:
+            return []
+        else:
+            new_alias_expr = sge.Alias(
+                this=sge.ColumnDef(
+                    this=sge.to_identifier(
+                        old_alias_map[column_alias_pair[0]], quoted=True
+                    )
+                ),
+                alias=sg.to_identifier(column_alias_pair[1], quoted=True),
+            )
+            new_selected_cols.append(new_alias_expr)
+    return new_selected_cols
+
+
+def _get_column_alias_pair(
+    expr: sge.Expression,
+) -> typing.Optional[typing.Tuple[str, str]]:
+    """Checks if an expression is a simple alias of a column definition
+    (e.g., "column_name AS alias_name").
+    If it is, returns a tuple containing the alias name and original column name.
+    Returns `None` otherwise.
+    """
+    if not isinstance(expr, sge.Alias):
+        return None
+    if not isinstance(expr.this, sge.ColumnDef):
+        return None
+
+    column_def_expr: sge.ColumnDef = expr.this
+    if not isinstance(column_def_expr.this, sge.Identifier):
+        return None
+
+    original_identifier: sge.Identifier = column_def_expr.this
+    return (original_identifier.this, expr.alias)