Skip to content

Commit 838b38d

Browse files
authored
Add pyarrow-based pandas ExtensionArray geometry classes: (#2)
* Add pyarrow-based pandas ExtensionArray geometry classes: - MultiPointArray - LineArray - MultiLineArray - RingArray - PolygonArray - MultiPolygonArray
1 parent b63ebe6 commit 838b38d

18 files changed

+1982
-5
lines changed

NOTICE

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
This work includes components that were originally developed in the fletcher library by
2+
Uwe L. Korn which is distributed under the following licence:
3+
4+
MIT License
5+
6+
Copyright (c) 2018 Uwe L. Korn
7+
8+
Permission is hereby granted, free of charge, to any person obtaining a copy
9+
of this software and associated documentation files (the "Software"), to deal
10+
in the Software without restriction, including without limitation the rights
11+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
copies of the Software, and to permit persons to whom the Software is
13+
furnished to do so, subject to the following conditions:
14+
15+
The above copyright notice and this permission notice shall be included in all
16+
copies or substantial portions of the Software.
17+
18+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24+
SOFTWARE.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
setup(name='spatialpandas',
44
packages=find_packages(exclude=('tests',)),
5-
install_requires=['pandas', 'dask', 'numba', 'numpy'],
5+
install_requires=['pandas', 'dask', 'numba', 'numpy', 'pyarrow>=0.15'],
66
tests_require=['pytest', 'hypothesis'])

spatialpandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import geometry
2+
from . import spatialindex

spatialpandas/geometry/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .polygon import Polygon, PolygonArray, PolygonDtype
2+
from .multipolygon import MultiPolygon, MultiPolygonArray, MultiPolygonDtype
3+
from .line import Line, LineArray, LineDtype
4+
from .multiline import MultiLine, MultiLineArray, MultiLineDtype
5+
from .multipoint import MultiPoint, MultiPointArray, MultiPointDtype
6+
from .ring import Ring, RingArray, RingDtype
7+
from .base import Geometry, GeometryArray, GeometryDtype

spatialpandas/geometry/_algorithms.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
from math import sqrt
2+
3+
import numpy as np
4+
from numba import jit, prange
5+
6+
from spatialpandas.utils import ngjit
7+
8+
9+
@ngjit
10+
def bounds_interleaved(values):
11+
"""
12+
compute bounds
13+
"""
14+
xmin = np.inf
15+
ymin = np.inf
16+
xmax = -np.inf
17+
ymax = -np.inf
18+
19+
for i in range(0, len(values), 2):
20+
x = values[i]
21+
if np.isfinite(x):
22+
xmin = min(xmin, x)
23+
xmax = max(xmax, x)
24+
25+
y = values[i + 1]
26+
if np.isfinite(y):
27+
ymin = min(ymin, y)
28+
ymax = max(ymax, y)
29+
30+
return (xmin, ymin, xmax, ymax)
31+
32+
33+
@ngjit
34+
def bounds_interleaved_1d(values, offset):
35+
"""
36+
compute bounds
37+
"""
38+
vmin = np.inf
39+
vmax = -np.inf
40+
41+
for i in range(0, len(values), 2):
42+
v = values[i + offset]
43+
if np.isfinite(v):
44+
vmin = min(vmin, v)
45+
vmax = max(vmax, v)
46+
47+
return (vmin, vmax)
48+
49+
50+
@ngjit
51+
def compute_line_length(values, value_offsets):
52+
total_len = 0.0
53+
for offset_ind in range(len(value_offsets) - 1):
54+
start = value_offsets[offset_ind]
55+
stop = value_offsets[offset_ind + 1]
56+
x0 = values[start]
57+
y0 = values[start + 1]
58+
59+
for i in range(start + 2, stop, 2):
60+
x1 = values[i]
61+
y1 = values[i + 1]
62+
63+
if (np.isfinite(x0) and np.isfinite(y0) and
64+
np.isfinite(x1) and np.isfinite(y1)):
65+
total_len += sqrt((x1 - x0) ** 2 + (y1 - y0) ** 2)
66+
67+
x0 = x1
68+
y0 = y1
69+
70+
return total_len
71+
72+
73+
@ngjit
74+
def compute_area(values, value_offsets):
75+
area = 0.0
76+
77+
for offset_ind in range(len(value_offsets) - 1):
78+
start = value_offsets[offset_ind]
79+
stop = value_offsets[offset_ind + 1]
80+
poly_length = stop - start
81+
82+
if poly_length < 6:
83+
# A degenerate polygon, zero area
84+
continue
85+
86+
for k in range(start, stop - 4, 2):
87+
i, j = k + 2, k + 4
88+
ix = values[i]
89+
jy = values[j + 1]
90+
ky = values[k + 1]
91+
92+
area += ix * (jy - ky)
93+
94+
# wrap-around term for polygon
95+
firstx = values[start]
96+
secondy = values[start + 3]
97+
lasty = values[stop - 3]
98+
area += firstx * (secondy - lasty)
99+
100+
return area / 2.0
101+
102+
103+
@jit(nogil=True, nopython=True, parallel=True)
104+
def geometry_map_nested1(
105+
fn, result, result_offset, values, value_offsets, missing
106+
):
107+
assert len(value_offsets) == 1
108+
value_offsets0 = value_offsets[0]
109+
n = len(value_offsets0) - 1
110+
for i in prange(n):
111+
if not missing[i]:
112+
result[i + result_offset] = fn(values, value_offsets0[i:i + 2])
113+
114+
115+
@jit(nogil=True, nopython=True, parallel=True)
116+
def geometry_map_nested2(
117+
fn, result, result_offset, values, value_offsets, missing
118+
):
119+
assert len(value_offsets) == 2
120+
value_offsets0 = value_offsets[0]
121+
value_offsets1 = value_offsets[1]
122+
n = len(value_offsets0) - 1
123+
for i in prange(n):
124+
if not missing[i]:
125+
start = value_offsets0[i]
126+
stop = value_offsets0[i + 1]
127+
result[i + result_offset] = fn(values, value_offsets1[start:stop + 1])
128+
129+
130+
@jit(nogil=True, nopython=True, parallel=True)
131+
def geometry_map_nested3(
132+
fn, result, result_offset, values, value_offsets, missing
133+
):
134+
assert len(value_offsets) == 3
135+
value_offsets0 = value_offsets[0]
136+
value_offsets1 = value_offsets[1]
137+
value_offsets2 = value_offsets[2]
138+
n = len(value_offsets0) - 1
139+
for i in prange(n):
140+
if not missing[i]:
141+
start = value_offsets1[value_offsets0[i]]
142+
stop = value_offsets1[value_offsets0[i + 1]]
143+
result[i + result_offset] = fn(values, value_offsets2[start:stop + 1])
144+
145+
146+
@jit(nopython=True, nogil=True)
147+
def _lexographic_lt0(a1, a2):
148+
"""
149+
Compare two 1D numpy arrays lexographically
150+
Parameters
151+
----------
152+
a1: ndarray
153+
1D numpy array
154+
a2: ndarray
155+
1D numpy array
156+
157+
Returns
158+
-------
159+
comparison:
160+
True if a1 < a2, False otherwise
161+
"""
162+
for e1, e2 in zip(a1, a2):
163+
if e1 < e2:
164+
return True
165+
elif e1 > e2:
166+
return False
167+
return len(a1) < len(a2)
168+
169+
170+
def _lexographic_lt(a1, a2):
171+
if a1.dtype != np.object and a1.dtype != np.object:
172+
# a1 and a2 primitive
173+
return _lexographic_lt0(a1, a2)
174+
elif a1.dtype == np.object and a1.dtype == np.object:
175+
# a1 and a2 object, process recursively
176+
for e1, e2 in zip(a1, a2):
177+
if _lexographic_lt(e1, e2):
178+
return True
179+
elif _lexographic_lt(e2, e1):
180+
return False
181+
return len(a1) < len(a2)
182+
elif a1.dtype != np.object:
183+
# a2 is object array, a1 primitive
184+
return True
185+
else:
186+
# a1 is object array, a2 primitive
187+
return False
188+
189+
190+
@ngjit
191+
def _extract_isnull_bytemap(bitmap, bitmap_length, bitmap_offset, dst_offset, dst):
192+
"""
193+
Note: Copied from fletcher: See NOTICE for license info
194+
195+
(internal) write the values of a valid bitmap as bytes to a pre-allocatored
196+
isnull bytemap.
197+
198+
Parameters
199+
----------
200+
bitmap: pyarrow.Buffer
201+
bitmap where a set bit indicates that a value is valid
202+
bitmap_length: int
203+
Number of bits to read from the bitmap
204+
bitmap_offset: int
205+
Number of bits to skip from the beginning of the bitmap.
206+
dst_offset: int
207+
Number of bytes to skip from the beginning of the output
208+
dst: numpy.array(dtype=bool)
209+
Pre-allocated numpy array where a byte is set when a value is null
210+
"""
211+
for i in range(bitmap_length):
212+
idx = bitmap_offset + i
213+
byte_idx = idx // 8
214+
bit_mask = 1 << (idx % 8)
215+
dst[dst_offset + i] = (bitmap[byte_idx] & bit_mask) == 0
216+
217+
218+
def extract_isnull_bytemap(list_array):
219+
"""
220+
Note: Copied from fletcher: See NOTICE for license info
221+
222+
Extract the valid bitmaps of a chunked array into numpy isnull bytemaps.
223+
224+
Parameters
225+
----------
226+
chunked_array: pyarrow.ChunkedArray
227+
228+
Returns
229+
-------
230+
valid_bytemap: numpy.array
231+
"""
232+
result = np.zeros(len(list_array), dtype=bool)
233+
234+
offset = 0
235+
chunk = list_array
236+
valid_bitmap = chunk.buffers()[0]
237+
if valid_bitmap:
238+
buf = memoryview(valid_bitmap)
239+
_extract_isnull_bytemap(buf, len(chunk), chunk.offset, offset, result)
240+
else:
241+
return np.full(len(list_array), False)
242+
243+
return result

0 commit comments

Comments
 (0)