Skip to content

feat: add GeoSeries.difference() and bigframes.bigquery.st_difference() #1471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
unix_millis,
unix_seconds,
)
from bigframes.bigquery._operations.geo import st_area
from bigframes.bigquery._operations.geo import st_area, st_difference
from bigframes.bigquery._operations.json import (
json_extract,
json_extract_array,
Expand All @@ -48,6 +48,7 @@
"array_to_string",
# geo ops
"st_area",
"st_difference",
# json ops
"json_set",
"json_extract",
Expand Down
120 changes: 120 additions & 0 deletions bigframes/bigquery/_operations/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from __future__ import annotations

from bigframes import operations as ops
import bigframes.dtypes
import bigframes.geopandas
import bigframes.series

Expand Down Expand Up @@ -91,3 +92,122 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
series = series._apply_unary_op(ops.geo_area_op)
series.name = None
return series


def st_difference(
series: bigframes.series.Series, other: bigframes.series.Series
) -> bigframes.series.Series:
"""
Returns a GEOGRAPHY that represents the point set difference of
`geography_1` and `geography_2`. Therefore, the result consists of the part
of `geography_1` that doesn't intersect with `geography_2`.

If `geometry_1` is completely contained in `geometry_2`, then ST_DIFFERENCE
returns an empty GEOGRAPHY.

..note::
BigQuery's Geography functions, like `st_difference`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data

**Examples:**

>>> import bigframes as bpd
>>> import bigframes.bigquery as bbq
>>> import bigframes.geopandas
>>> from shapely.geometry import Polygon, LineString, Point
>>> bpd.options.display.progress_bar = None

We can check two GeoSeries against each other, row by row.

>>> s1 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )

>>> s1
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry

>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry

>>> bbq.st_difference(s1, s2)
0 None
1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1))
2 LINESTRING (0 0, 1 1.00046, 2 2)
3 GEOMETRYCOLLECTION EMPTY
4 POINT (0 1)
5 None
dtype: geometry

We can also check difference of single shapely geometries:

>>> sbq1 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
... ]
... )
>>> sbq2 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])
... ]
... )

>>> sbq1
0 POLYGON ((0 0, 10 0, 10 10, 0 0))
dtype: geometry

>>> sbq2
0 POLYGON ((4 2, 6 2, 8 6, 4 2))
dtype: geometry

>>> bbq.st_difference(sbq1, sbq2)
0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4...
dtype: geometry

Additionally, we can check difference of a GeoSeries against a single shapely geometry:

>>> bbq.st_difference(s1, sbq2)
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 None
2 None
3 None
4 None
dtype: geometry

Args:
other (bigframes.series.Series or geometric object):
The GeoSeries (elementwise) or geometric object to find the difference to.

Returns:
bigframes.series.Series:
A GeoSeries of the points in each aligned geometry that are not
in other.
"""
return series._apply_binary_op(other, ops.geo_st_difference_op)
17 changes: 12 additions & 5 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,11 +1001,6 @@ def normalize_op_impl(x: ibis_types.Value):


# Geo Ops
@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
def geo_st_boundary_op_impl(x: ibis_types.Value):
return st_boundary(x)


@scalar_op_compiler.register_unary_op(ops.geo_area_op)
def geo_area_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).area()
Expand All @@ -1016,6 +1011,18 @@ def geo_st_astext_op_impl(x: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).as_text()


@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False)
def geo_st_boundary_op_impl(x: ibis_types.Value):
return st_boundary(x)


@scalar_op_compiler.register_binary_op(ops.geo_st_difference_op, pass_op=False)
def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value):
return typing.cast(ibis_types.GeoSpatialValue, x).difference(
typing.cast(ibis_types.GeoSpatialValue, y)
)


@scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op)
def geo_st_geogfromtext_op_impl(x: ibis_types.Value):
# Ibis doesn't seem to provide a dedicated method to cast from string to geography,
Expand Down
5 changes: 4 additions & 1 deletion bigframes/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore

Raises:
NotImplementedError:
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.
"""
raise NotImplementedError(
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
Expand Down Expand Up @@ -93,3 +93,6 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series:
series = self._apply_unary_op(ops.geo_st_astext_op)
series.name = None
return series

def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
return self._apply_binary_op(other, ops.geo_st_difference_op)
2 changes: 2 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
geo_area_op,
geo_st_astext_op,
geo_st_boundary_op,
geo_st_difference_op,
geo_st_geogfromtext_op,
geo_st_geogpoint_op,
geo_x_op,
Expand Down Expand Up @@ -366,6 +367,7 @@
# Geo ops
"geo_area_op",
"geo_st_boundary_op",
"geo_st_difference_op",
"geo_st_astext_op",
"geo_st_geogfromtext_op",
"geo_st_geogpoint_op",
Expand Down
5 changes: 4 additions & 1 deletion bigframes/operations/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,17 @@
),
)

geo_st_difference_op = base_ops.create_binary_op(
name="geo_st_difference", type_signature=op_typing.BinaryGeo()
)

geo_st_geogfromtext_op = base_ops.create_unary_op(
name="geo_st_geogfromtext",
type_signature=op_typing.FixedOutputType(
dtypes.is_string_like, dtypes.GEO_DTYPE, description="string-like"
),
)


geo_st_geogpoint_op = base_ops.create_binary_op(
name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo()
)
Expand Down
14 changes: 14 additions & 0 deletions bigframes/operations/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,20 @@ def output_type(


@dataclasses.dataclass
@dataclasses.dataclass
class BinaryGeo(BinaryTypeSignature):
"""Type signature for geo functions like difference that can map geo to geo."""

def output_type(
self, left_type: ExpressionType, right_type: ExpressionType
) -> ExpressionType:
if (left_type is not None) and not bigframes.dtypes.is_geo_like(left_type):
raise TypeError(f"Type {left_type} is not geo")
if (right_type is not None) and not bigframes.dtypes.is_geo_like(right_type):
raise TypeError(f"Type {right_type} is not numeric")
return bigframes.dtypes.GEO_DTYPE


class BinaryNumericGeo(BinaryTypeSignature):
"""Type signature for geo functions like from_xy that can map ints to ints."""

Expand Down
Loading