googleapis
diff --git a/‎bigframes/bigquery/__init__.py
Lines changed: 2 additions & 1 deletion b/‎bigframes/bigquery/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎bigframes/bigquery/_operations/geo.py
Lines changed: 120 additions & 0 deletions b/‎bigframes/bigquery/_operations/geo.py
Lines changed: 120 additions & 0 deletions
diff --git a/‎bigframes/blob/_functions.py
Lines changed: 3 additions & 1 deletion b/‎bigframes/blob/_functions.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎bigframes/clients.py
Lines changed: 16 additions & 8 deletions b/‎bigframes/clients.py
Lines changed: 16 additions & 8 deletions
diff --git a/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 12 additions & 5 deletions b/‎bigframes/core/compile/scalar_op_compiler.py
Lines changed: 12 additions & 5 deletions
diff --git a/‎bigframes/core/utils.py
Lines changed: 34 additions & 1 deletion b/‎bigframes/core/utils.py
Lines changed: 34 additions & 1 deletion
diff --git a/‎bigframes/dataframe.py
Lines changed: 3 additions & 4 deletions b/‎bigframes/dataframe.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎bigframes/geopandas/geoseries.py
Lines changed: 4 additions & 1 deletion b/‎bigframes/geopandas/geoseries.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎bigframes/operations/__init__.py
Lines changed: 2 additions & 0 deletions b/‎bigframes/operations/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎bigframes/operations/blob.py
Lines changed: 14 additions & 14 deletions b/‎bigframes/operations/blob.py
Lines changed: 14 additions & 14 deletions
@@ -27,7 +27,7 @@
     unix_millis,
     unix_seconds,
 )
-from bigframes.bigquery._operations.geo import st_area
+from bigframes.bigquery._operations.geo import st_area, st_difference
 from bigframes.bigquery._operations.json import (
     json_extract,
     json_extract_array,
@@ -48,6 +48,7 @@
     "array_to_string",
     # geo ops
     "st_area",
+    "st_difference",
     # json ops
     "json_set",
     "json_extract",
 
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 from bigframes import operations as ops
+import bigframes.dtypes
 import bigframes.geopandas
 import bigframes.series
 
@@ -91,3 +92,122 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
     series = series._apply_unary_op(ops.geo_area_op)
     series.name = None
     return series
+
+
+def st_difference(
+    series: bigframes.series.Series, other: bigframes.series.Series
+) -> bigframes.series.Series:
+    """
+    Returns a GEOGRAPHY that represents the point set difference of
+    `geography_1` and `geography_2`. Therefore, the result consists of the part
+    of `geography_1` that doesn't intersect with `geography_2`.
+
+    If `geometry_1` is completely contained in `geometry_2`, then ST_DIFFERENCE
+    returns an empty GEOGRAPHY.
+
+    ..note::
+        BigQuery's Geography functions, like `st_difference`, interpret the geometry
+        data type as a point set on the Earth's surface. A point set is a set
+        of points, lines, and polygons on the WGS84 reference spheroid, with
+        geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
+
+    **Examples:**
+
+        >>> import bigframes as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> import bigframes.geopandas
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> bpd.options.display.progress_bar = None
+
+    We can check two GeoSeries against each other, row by row.
+
+        >>> s1 = bigframes.geopandas.GeoSeries(
+        ...    [
+        ...        Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...        Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...        LineString([(0, 0), (2, 2)]),
+        ...        LineString([(2, 0), (0, 2)]),
+        ...        Point(0, 1),
+        ...    ],
+        ... )
+        >>> s2 = bigframes.geopandas.GeoSeries(
+        ...    [
+        ...        Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...        LineString([(1, 0), (1, 3)]),
+        ...        LineString([(2, 0), (0, 2)]),
+        ...        Point(1, 1),
+        ...        Point(0, 1),
+        ...    ],
+        ...    index=range(1, 6),
+        ... )
+
+        >>> s1
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        2             LINESTRING (0 0, 2 2)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (0 1)
+        dtype: geometry
+
+        >>> s2
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2             LINESTRING (1 0, 1 3)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (1 1)
+        5                       POINT (0 1)
+        dtype: geometry
+
+        >>> bbq.st_difference(s1, s2)
+        0                                               None
+        1    POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1))
+        2                   LINESTRING (0 0, 1 1.00046, 2 2)
+        3                           GEOMETRYCOLLECTION EMPTY
+        4                                        POINT (0 1)
+        5                                               None
+        dtype: geometry
+
+    We can also check difference of single shapely geometries:
+
+        >>> sbq1 = bigframes.geopandas.GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
+        ...     ]
+        ... )
+        >>> sbq2 = bigframes.geopandas.GeoSeries(
+        ...     [
+        ...         Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])
+        ...     ]
+        ... )
+
+        >>> sbq1
+        0    POLYGON ((0 0, 10 0, 10 10, 0 0))
+        dtype: geometry
+
+        >>> sbq2
+        0    POLYGON ((4 2, 6 2, 8 6, 4 2))
+        dtype: geometry
+
+        >>> bbq.st_difference(sbq1, sbq2)
+        0    POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4...
+        dtype: geometry
+
+    Additionally, we can check difference of a GeoSeries against a single shapely geometry:
+
+        >>> bbq.st_difference(s1, sbq2)
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1                              None
+        2                              None
+        3                              None
+        4                              None
+        dtype: geometry
+
+    Args:
+        other (bigframes.series.Series or geometric object):
+            The GeoSeries (elementwise) or geometric object to find the difference to.
+
+    Returns:
+        bigframes.series.Series:
+            A GeoSeries of the points in each aligned geometry that are not
+            in other.
+    """
+    return series._apply_binary_op(other, ops.geo_st_difference_op)
@@ -68,7 +68,9 @@ def _output_bq_type(self):
 
     def _create_udf(self):
         """Create Python UDF in BQ. Return name of the UDF."""
-        udf_name = str(self._session._loader._storage_manager._random_table())
+        udf_name = str(
+            self._session._loader._storage_manager.generate_unique_resource_id()
+        )
 
         func_body = inspect.getsource(self._func)
         func_name = self._func.__name__
 
@@ -94,16 +94,24 @@ def create_bq_connection(
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#grant_permission_on_function
         self._ensure_iam_binding(project_id, service_account_id, iam_role)
 
-    # Introduce retries to accommodate transient errors like etag mismatch,
-    # which can be caused by concurrent operation on the same resource, and
-    # manifests with message like:
-    # google.api_core.exceptions.Aborted: 409 There were concurrent policy
-    # changes. Please retry the whole read-modify-write with exponential
-    # backoff. The request's ETag '\007\006\003,\264\304\337\272' did not match
-    # the current policy's ETag '\007\006\003,\3750&\363'.
+    # Introduce retries to accommodate transient errors like:
+    # (1) Etag mismatch,
+    #     which can be caused by concurrent operation on the same resource, and
+    #     manifests with message like:
+    #     google.api_core.exceptions.Aborted: 409 There were concurrent policy
+    #     changes. Please retry the whole read-modify-write with exponential
+    #     backoff. The request's ETag '\007\006\003,\264\304\337\272' did not
+    #     match the current policy's ETag '\007\006\003,\3750&\363'.
+    # (2) Connection creation,
+    #     for which sometimes it takes a bit for its service account to reflect
+    #     across APIs (e.g. b/397662004, b/386838767), before which, an attempt
+    #     to set an IAM policy for the service account may throw an error like:
+    #     google.api_core.exceptions.InvalidArgument: 400 Service account
+    #     bqcx-*@gcp-sa-bigquery-condel.iam.gserviceaccount.com does not exist.
     @google.api_core.retry.Retry(
         predicate=google.api_core.retry.if_exception_type(
-            google.api_core.exceptions.Aborted
+            google.api_core.exceptions.Aborted,
+            google.api_core.exceptions.InvalidArgument,
         ),
         initial=10,
         maximum=20,
 
@@ -1001,11 +1001,6 @@ def normalize_op_impl(x: ibis_types.Value):
 
 
 # Geo Ops
-@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
-def geo_st_boundary_op_impl(x: ibis_types.Value):
-    return st_boundary(x)
-
-
 @scalar_op_compiler.register_unary_op(ops.geo_area_op)
 def geo_area_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.GeoSpatialValue, x).area()
@@ -1016,6 +1011,18 @@ def geo_st_astext_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.GeoSpatialValue, x).as_text()
 
 
+@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
+def geo_st_boundary_op_impl(x: ibis_types.Value):
+    return st_boundary(x)
+
+
+@scalar_op_compiler.register_binary_op(ops.geo_st_difference_op, pass_op=False)
+def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value):
+    return typing.cast(ibis_types.GeoSpatialValue, x).difference(
+        typing.cast(ibis_types.GeoSpatialValue, y)
+    )
+
+
 @scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op)
 def geo_st_geogfromtext_op_impl(x: ibis_types.Value):
     # Ibis doesn't seem to provide a dedicated method to cast from string to geography,
 
@@ -18,10 +18,12 @@
 from typing import Hashable, Iterable, List
 import warnings
 
+import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
 import numpy as np
 import pandas as pd
 import pandas.api.types as pdtypes
+import pyarrow as pa
 import typing_extensions
 
 import bigframes.dtypes as dtypes
@@ -243,6 +245,22 @@ def replace_timedeltas_with_micros(dataframe: pd.DataFrame) -> List[str]:
     return updated_columns
 
 
+def _search_for_nested_json_type(arrow_type: pa.DataType) -> bool:
+    """
+    Searches recursively for JSON array type within a PyArrow DataType.
+    """
+    if arrow_type == dtypes.JSON_ARROW_TYPE:
+        return True
+    if pa.types.is_list(arrow_type):
+        return _search_for_nested_json_type(arrow_type.value_type)
+    if pa.types.is_struct(arrow_type):
+        for i in range(arrow_type.num_fields):
+            if _search_for_nested_json_type(arrow_type.field(i).type):
+                return True
+        return False
+    return False
+
+
 def replace_json_with_string(dataframe: pd.DataFrame) -> List[str]:
     """
     Due to a BigQuery IO limitation with loading JSON from Parquet files (b/374784249),
@@ -253,12 +271,27 @@ def replace_json_with_string(dataframe: pd.DataFrame) -> List[str]:
     updated_columns = []
 
     for col in dataframe.columns:
-        if dataframe[col].dtype == dtypes.JSON_DTYPE:
+        column_type = dataframe[col].dtype
+        if column_type == dtypes.JSON_DTYPE:
             dataframe[col] = dataframe[col].astype(dtypes.STRING_DTYPE)
             updated_columns.append(col)
+        elif isinstance(column_type, pd.ArrowDtype) and _search_for_nested_json_type(
+            column_type.pyarrow_dtype
+        ):
+            raise NotImplementedError(
+                f"Nested JSON types, found in column `{col}`: `{column_type}`', "
+                f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
+            )
 
     if dataframe.index.dtype == dtypes.JSON_DTYPE:
         dataframe.index = dataframe.index.astype(dtypes.STRING_DTYPE)
         updated_columns.append(dataframe.index.name)
+    elif isinstance(
+        dataframe.index.dtype, pd.ArrowDtype
+    ) and _search_for_nested_json_type(dataframe.index.dtype.pyarrow_dtype):
+        raise NotImplementedError(
+            f"Nested JSON types, found in the index: `{dataframe.index.dtype}`', "
+            f"are currently unsupported for upload. {constants.FEEDBACK_LINK}"
+        )
 
     return updated_columns
@@ -3760,10 +3760,9 @@ def to_gbq(
                 )
             if_exists = "replace"
 
-            temp_table_ref = self._session._temp_storage_manager._random_table(
-                # The client code owns this table reference now, so skip_cleanup=True
-                #  to not clean it up when we close the session.
-                skip_cleanup=True,
+            # The client code owns this table reference now
+            temp_table_ref = (
+                self._session._temp_storage_manager.generate_unique_resource_id()
             )
             destination_table = f"{temp_table_ref.project}.{temp_table_ref.dataset_id}.{temp_table_ref.table_id}"
 
 
@@ -62,7 +62,7 @@ def area(self, crs=None) -> bigframes.series.Series:  # type: ignore
 
         Raises:
             NotImplementedError:
-                GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.
+                GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.
         """
         raise NotImplementedError(
             f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
@@ -93,3 +93,6 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series:
         series = self._apply_unary_op(ops.geo_st_astext_op)
         series.name = None
         return series
+
+    def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series:  # type: ignore
+        return self._apply_binary_op(other, ops.geo_st_difference_op)
@@ -90,6 +90,7 @@
     geo_area_op,
     geo_st_astext_op,
     geo_st_boundary_op,
+    geo_st_difference_op,
     geo_st_geogfromtext_op,
     geo_st_geogpoint_op,
     geo_x_op,
@@ -366,6 +367,7 @@
     # Geo ops
     "geo_area_op",
     "geo_st_boundary_op",
+    "geo_st_difference_op",
     "geo_st_astext_op",
     "geo_st_geogfromtext_op",
     "geo_st_geogpoint_op",
 
@@ -560,9 +560,9 @@ def pdf_extract(
         self,
         *,
         connection: Optional[str] = None,
-        max_batching_rows: int = 8192,
-        container_cpu: Union[float, int] = 0.33,
-        container_memory: str = "512Mi",
+        max_batching_rows: int = 1,
+        container_cpu: Union[float, int] = 2,
+        container_memory: str = "1Gi",
     ) -> bigframes.series.Series:
         """Extracts text from PDF URLs and saves the text as string.
 
@@ -574,10 +574,10 @@ def pdf_extract(
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
-            max_batching_rows (int, default 8,192): Max number of rows per batch
+            max_batching_rows (int, default 1): Max number of rows per batch
                 send to cloud run to execute the function.
-            container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
-            container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
+            container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
+            container_memory (str, default "1Gi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
             bigframes.series.Series: conatins all text from a pdf file
@@ -604,11 +604,11 @@ def pdf_chunk(
         self,
         *,
         connection: Optional[str] = None,
-        chunk_size: int = 1000,
+        chunk_size: int = 2000,
         overlap_size: int = 200,
-        max_batching_rows: int = 8192,
-        container_cpu: Union[float, int] = 0.33,
-        container_memory: str = "512Mi",
+        max_batching_rows: int = 1,
+        container_cpu: Union[float, int] = 2,
+        container_memory: str = "1Gi",
     ) -> bigframes.series.Series:
         """Extracts and chunks text from PDF URLs and saves the text as
            arrays of strings.
@@ -620,15 +620,15 @@ def pdf_chunk(
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
-            chunk_size (int, default 1000): the desired size of each text chunk
+            chunk_size (int, default 2000): the desired size of each text chunk
                 (number of characters).
             overlap_size (int, default 200): the number of overlapping characters
                 between consective chunks. The helps to ensure context is
                 perserved across chunk boundaries.
-            max_batching_rows (int, default 8,192): Max number of rows per batch
+            max_batching_rows (int, default 1): Max number of rows per batch
                 send to cloud run to execute the function.
-            container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
-            container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
+            container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
+            container_memory (str, default "1Gi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
             bigframe.series.Series: Series of array[str], where each string is a
Original file line number	Diff line number	Diff line change
`@@ -3760,10 +3760,9 @@ def to_gbq(`
`3760`	`3760`	`)`
`3761`	`3761`	`if_exists = "replace"`
`3762`	`3762`
`3763`		`- temp_table_ref = self._session._temp_storage_manager._random_table(`
`3764`		`- # The client code owns this table reference now, so skip_cleanup=True`
`3765`		`- # to not clean it up when we close the session.`
`3766`		`- skip_cleanup=True,`
	`3763`	`+ # The client code owns this table reference now`
	`3764`	`+ temp_table_ref = (`
	`3765`	`+ self._session._temp_storage_manager.generate_unique_resource_id()`
`3767`	`3766`	`)`
`3768`	`3767`	`destination_table = f"{temp_table_ref.project}.{temp_table_ref.dataset_id}.{temp_table_ref.table_id}"`
`3769`	`3768`