Skip to content

Add pyarrow-based pandas ExtensionArray geometry classes: #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Nov 12, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
aa60669
Add pyarrow-based pandas ExtensionArray geometry classes:
jonmmease Nov 4, 2019
0c5ed0d
Add MIT License header
jonmmease Nov 4, 2019
baf2870
make_array_nonempty must return length 2 array of type
jonmmease Nov 5, 2019
eb5cbf0
Add orient polygon import arg and docstrings
jonmmease Nov 5, 2019
6baa9af
Update spatialpandas/geometry/base.py
jonmmease Nov 12, 2019
eb0ef32
Update spatialpandas/geometry/base.py
jonmmease Nov 12, 2019
0a905ea
Remove unsupported deep keyword from copy
jonmmease Nov 12, 2019
853102a
Update spatialpandas/geometry/multipolygon2d.py
jonmmease Nov 12, 2019
49230d5
Update spatialpandas/geometry/multipolygon2d.py
jonmmease Nov 12, 2019
62dadfe
Update spatialpandas/geometry/polygon2d.py
jonmmease Nov 12, 2019
d89f9ec
Update spatialpandas/geometry/polygon2d.py
jonmmease Nov 12, 2019
4bf5a26
Note that there is a performance cost associated with orient=True
jonmmease Nov 12, 2019
c84adcf
Add docstring explaining make_array_nonempty
jonmmease Nov 12, 2019
a832b55
Use public pandas imports
jonmmease Nov 12, 2019
19b4a6e
Don't register base Geometry type with pandas
jonmmease Nov 12, 2019
f4db853
Include class type in hash methods
jonmmease Nov 12, 2019
34dfd90
More reliable calculation of numpy dtype
jonmmease Nov 12, 2019
29dc141
Use Line2d rather than base Geometry for extension array tests
jonmmease Nov 12, 2019
beb1f3c
Remove explicit "2d" from geometry class names
jonmmease Nov 12, 2019
080d7a2
honor the copy arg to astype when type is unchanged
jonmmease Nov 12, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
This work includes components that were originally developed in the fletcher library by
Uwe L. Korn which is distributed under the following licence:

MIT License

Copyright (c) 2018 Uwe L. Korn

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

setup(name='spatialpandas',
packages=find_packages(exclude=('tests',)),
install_requires=['pandas', 'dask', 'numba', 'numpy'],
install_requires=['pandas', 'dask', 'numba', 'numpy', 'pyarrow>=0.15'],
tests_require=['pytest', 'hypothesis'])
2 changes: 2 additions & 0 deletions spatialpandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import geometry
from . import spatialindex
7 changes: 7 additions & 0 deletions spatialpandas/geometry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .polygon2d import Polygon2d, Polygon2dArray, Polygon2dDtype
from .multipolygon2d import MultiPolygon2d, MultiPolygon2dArray, MultiPolygon2dDtype
from .line2d import Line2d, Line2dArray, Line2dDtype
from .multiline2d import MultiLine2d, MultiLine2dArray, MultiLine2dDtype
from .multipoint2d import MultiPoint2d, MultiPoint2dArray, MultiPoint2dDtype
from .ring2d import Ring2d, Ring2dArray, Ring2dDtype
from .base import Geometry, GeometryArray, GeometryDtype
243 changes: 243 additions & 0 deletions spatialpandas/geometry/_algorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
from math import sqrt

import numpy as np
from numba import jit, prange

from spatialpandas.utils import ngjit


@ngjit
def bounds_interleaved(values):
"""
compute bounds
"""
xmin = np.inf
ymin = np.inf
xmax = -np.inf
ymax = -np.inf

for i in range(0, len(values), 2):
x = values[i]
if np.isfinite(x):
xmin = min(xmin, x)
xmax = max(xmax, x)

y = values[i + 1]
if np.isfinite(y):
ymin = min(ymin, y)
ymax = max(ymax, y)

return (xmin, ymin, xmax, ymax)


@ngjit
def bounds_interleaved_1d(values, offset):
"""
compute bounds
"""
vmin = np.inf
vmax = -np.inf

for i in range(0, len(values), 2):
v = values[i + offset]
if np.isfinite(v):
vmin = min(vmin, v)
vmax = max(vmax, v)

return (vmin, vmax)


@ngjit
def compute_line_length(values, value_offsets):
total_len = 0.0
for offset_ind in range(len(value_offsets) - 1):
start = value_offsets[offset_ind]
stop = value_offsets[offset_ind + 1]
x0 = values[start]
y0 = values[start + 1]

for i in range(start + 2, stop, 2):
x1 = values[i]
y1 = values[i + 1]

if (np.isfinite(x0) and np.isfinite(y0) and
np.isfinite(x1) and np.isfinite(y1)):
total_len += sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2)

x0 = x1
y0 = y1

return total_len


@ngjit
def compute_area(values, value_offsets):
area = 0.0

for offset_ind in range(len(value_offsets) - 1):
start = value_offsets[offset_ind]
stop = value_offsets[offset_ind + 1]
poly_length = stop - start

if poly_length < 6:
# A degenerate polygon, zero area
continue

for k in range(start, stop - 4, 2):
i, j = k + 2, k + 4
ix = values[i]
jy = values[j + 1]
ky = values[k + 1]

area += ix * (jy - ky)

# wrap-around term for polygon
firstx = values[start]
secondy = values[start + 3]
lasty = values[stop - 3]
area += firstx * (secondy - lasty)

return area / 2.0


@jit(nogil=True, nopython=True, parallel=True)
def geometry_map_nested1(
fn, result, result_offset, values, value_offsets, missing
):
assert len(value_offsets) == 1
value_offsets0 = value_offsets[0]
n = len(value_offsets0) - 1
for i in prange(n):
if not missing[i]:
result[i + result_offset] = fn(values, value_offsets0[i:i + 2])


@jit(nogil=True, nopython=True, parallel=True)
def geometry_map_nested2(
fn, result, result_offset, values, value_offsets, missing
):
assert len(value_offsets) == 2
value_offsets0 = value_offsets[0]
value_offsets1 = value_offsets[1]
n = len(value_offsets0) - 1
for i in prange(n):
if not missing[i]:
start = value_offsets0[i]
stop = value_offsets0[i + 1]
result[i + result_offset] = fn(values, value_offsets1[start:stop + 1])


@jit(nogil=True, nopython=True, parallel=True)
def geometry_map_nested3(
fn, result, result_offset, values, value_offsets, missing
):
assert len(value_offsets) == 3
value_offsets0 = value_offsets[0]
value_offsets1 = value_offsets[1]
value_offsets2 = value_offsets[2]
n = len(value_offsets0) - 1
for i in prange(n):
if not missing[i]:
start = value_offsets1[value_offsets0[i]]
stop = value_offsets1[value_offsets0[i + 1]]
result[i + result_offset] = fn(values, value_offsets2[start:stop + 1])


@jit(nopython=True, nogil=True)
def _lexographic_lt0(a1, a2):
"""
Compare two 1D numpy arrays lexographically
Parameters
----------
a1: ndarray
1D numpy array
a2: ndarray
1D numpy array

Returns
-------
comparison:
True if a1 < a2, False otherwise
"""
for e1, e2 in zip(a1, a2):
if e1 < e2:
return True
elif e1 > e2:
return False
return len(a1) < len(a2)


def _lexographic_lt(a1, a2):
if a1.dtype != np.object and a1.dtype != np.object:
# a1 and a2 primitive
return _lexographic_lt0(a1, a2)
elif a1.dtype == np.object and a1.dtype == np.object:
# a1 and a2 object, process recursively
for e1, e2 in zip(a1, a2):
if _lexographic_lt(e1, e2):
return True
elif _lexographic_lt(e2, e1):
return False
return len(a1) < len(a2)
elif a1.dtype != np.object:
# a2 is object array, a1 primitive
return True
else:
# a1 is object array, a2 primitive
return False


@ngjit
def _extract_isnull_bytemap(bitmap, bitmap_length, bitmap_offset, dst_offset, dst):
"""
Note: Copied from fletcher: See NOTICE for license info

(internal) write the values of a valid bitmap as bytes to a pre-allocatored
isnull bytemap.

Parameters
----------
bitmap: pyarrow.Buffer
bitmap where a set bit indicates that a value is valid
bitmap_length: int
Number of bits to read from the bitmap
bitmap_offset: int
Number of bits to skip from the beginning of the bitmap.
dst_offset: int
Number of bytes to skip from the beginning of the output
dst: numpy.array(dtype=bool)
Pre-allocated numpy array where a byte is set when a value is null
"""
for i in range(bitmap_length):
idx = bitmap_offset + i
byte_idx = idx // 8
bit_mask = 1 << (idx % 8)
dst[dst_offset + i] = (bitmap[byte_idx] & bit_mask) == 0


def extract_isnull_bytemap(list_array):
"""
Note: Copied from fletcher: See NOTICE for license info

Extract the valid bitmaps of a chunked array into numpy isnull bytemaps.

Parameters
----------
chunked_array: pyarrow.ChunkedArray

Returns
-------
valid_bytemap: numpy.array
"""
result = np.zeros(len(list_array), dtype=bool)

offset = 0
chunk = list_array
valid_bitmap = chunk.buffers()[0]
if valid_bitmap:
buf = memoryview(valid_bitmap)
_extract_isnull_bytemap(buf, len(chunk), chunk.offset, offset, result)
else:
return np.full(len(list_array), False)

return result
Loading