Skip to content

Commit c3beb50

Browse files
dcherianparaseba
andauthored
Stateful tests for array/group manipulation (#2189)
* Stateful tests for array/group manipulation Port over some stateful tests from [Arraylake](https://docs.earthmover.io/). Co-authored-by: Sebastián Galkin <[email protected]> * add TODOs --------- Co-authored-by: Sebastián Galkin <[email protected]>
1 parent a2abb24 commit c3beb50

File tree

1 file changed

+223
-0
lines changed

1 file changed

+223
-0
lines changed
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import hypothesis.extra.numpy as npst
2+
import hypothesis.strategies as st
3+
import pytest
4+
from hypothesis import assume, note
5+
from hypothesis.stateful import (
6+
RuleBasedStateMachine,
7+
Settings,
8+
initialize,
9+
invariant,
10+
precondition,
11+
rule,
12+
run_state_machine_as_test,
13+
)
14+
15+
import zarr
16+
from zarr import Array
17+
from zarr.abc.store import Store
18+
from zarr.core.sync import SyncMixin
19+
from zarr.storage import MemoryStore, ZipStore
20+
from zarr.testing.strategies import node_names, np_array_and_chunks, numpy_arrays
21+
22+
23+
def split_prefix_name(path):
24+
split = path.rsplit("/", maxsplit=1)
25+
if len(split) > 1:
26+
prefix, name = split
27+
else:
28+
prefix = ""
29+
(name,) = split
30+
return prefix, name
31+
32+
33+
class ZarrHierarchyStateMachine(SyncMixin, RuleBasedStateMachine):
34+
"""
35+
This state machine models operations that modify a zarr store's
36+
hierarchy. That is, user actions that modify arrays/groups as well
37+
as list operations. It is intended to be used by external stores, and
38+
compares their results to a MemoryStore that is assumed to be perfect.
39+
"""
40+
41+
def __init__(self, store) -> None:
42+
super().__init__()
43+
44+
self.store = store
45+
46+
self.model = MemoryStore(mode="w")
47+
zarr.group(store=self.model)
48+
49+
# Track state of the hierarchy, these should contain fully qualified paths
50+
self.all_groups = set()
51+
self.all_arrays = set()
52+
53+
@initialize()
54+
def init_store(self):
55+
# This lets us reuse the fixture provided store.
56+
self._sync(self.store.clear())
57+
zarr.group(store=self.store)
58+
59+
def can_add(self, path):
60+
return path not in self.all_groups and path not in self.all_arrays
61+
62+
# -------------------- store operations -----------------------
63+
@rule(name=node_names, data=st.data())
64+
def add_group(self, name, data):
65+
if self.all_groups:
66+
parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Group parent")
67+
else:
68+
parent = ""
69+
path = f"{parent}/{name}".lstrip("/")
70+
assume(self.can_add(path))
71+
note(f"Adding group: path='{path}'")
72+
self.all_groups.add(path)
73+
zarr.group(store=self.store, path=path)
74+
zarr.group(store=self.model, path=path)
75+
76+
@rule(
77+
data=st.data(),
78+
name=node_names,
79+
array_and_chunks=np_array_and_chunks(arrays=numpy_arrays(zarr_formats=st.just(3))),
80+
)
81+
def add_array(self, data, name, array_and_chunks):
82+
array, chunks = array_and_chunks
83+
fill_value = data.draw(npst.from_dtype(array.dtype))
84+
if self.all_groups:
85+
parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Array parent")
86+
else:
87+
parent = ""
88+
# TODO: support creating deeper paths
89+
# TODO: support overwriting potentially by just skipping `self.can_add`
90+
path = f"{parent}/{name}".lstrip("/")
91+
assume(self.can_add(path))
92+
note(f"Adding array: path='{path}' shape={array.shape} chunks={chunks}")
93+
for store in [self.store, self.model]:
94+
zarr.array(array, chunks=chunks, path=path, store=store, fill_value=fill_value)
95+
self.all_arrays.add(path)
96+
97+
# @precondition(lambda self: bool(self.all_groups))
98+
# @precondition(lambda self: bool(self.all_arrays))
99+
# @rule(data=st.data())
100+
# def move_array(self, data):
101+
# array_path = data.draw(st.sampled_from(self.all_arrays), label="Array move source")
102+
# to_group = data.draw(st.sampled_from(self.all_groups), label="Array move destination")
103+
104+
# # fixme renaiming to self?
105+
# array_name = os.path.basename(array_path)
106+
# assume(self.model.can_add(to_group, array_name))
107+
# new_path = f"{to_group}/{array_name}".lstrip("/")
108+
# note(f"moving array '{array_path}' -> '{new_path}'")
109+
# self.model.rename(array_path, new_path)
110+
# self.repo.store.rename(array_path, new_path)
111+
112+
# @precondition(lambda self: len(self.all_groups) >= 2)
113+
# @rule(data=st.data())
114+
# def move_group(self, data):
115+
# from_group = data.draw(st.sampled_from(self.all_groups), label="Group move source")
116+
# to_group = data.draw(st.sampled_from(self.all_groups), label="Group move destination")
117+
# assume(not to_group.startswith(from_group))
118+
119+
# from_group_name = os.path.basename(from_group)
120+
# assume(self.model.can_add(to_group, from_group_name))
121+
# # fixme renaiming to self?
122+
# new_path = f"{to_group}/{from_group_name}".lstrip("/")
123+
# note(f"moving group '{from_group}' -> '{new_path}'")
124+
# self.model.rename(from_group, new_path)
125+
# self.repo.store.rename(from_group, new_path)
126+
127+
@precondition(lambda self: len(self.all_arrays) >= 1)
128+
@rule(data=st.data())
129+
def delete_array_using_del(self, data):
130+
array_path = data.draw(
131+
st.sampled_from(sorted(self.all_arrays)), label="Array deletion target"
132+
)
133+
prefix, array_name = split_prefix_name(array_path)
134+
note(f"Deleting array '{array_path}' ({prefix=!r}, {array_name=!r}) using del")
135+
for store in [self.model, self.store]:
136+
group = zarr.open_group(path=prefix, store=store)
137+
group[array_name] # check that it exists
138+
del group[array_name]
139+
self.all_arrays.remove(array_path)
140+
141+
@precondition(lambda self: len(self.all_groups) >= 2) # fixme don't delete root
142+
@rule(data=st.data())
143+
def delete_group_using_del(self, data):
144+
group_path = data.draw(
145+
st.sampled_from(sorted(self.all_groups)), label="Group deletion target"
146+
)
147+
prefix, group_name = split_prefix_name(group_path)
148+
note(f"Deleting group '{group_path=!r}', {prefix=!r}, {group_name=!r} using delete")
149+
members = zarr.open_group(store=self.model, path=group_path).members(max_depth=None)
150+
for _, obj in members:
151+
if isinstance(obj, Array):
152+
self.all_arrays.remove(obj.path)
153+
else:
154+
self.all_groups.remove(obj.path)
155+
for store in [self.store, self.model]:
156+
group = zarr.open_group(store=store, path=prefix)
157+
group[group_name] # check that it exists
158+
del group[group_name]
159+
if group_path != "/":
160+
# The root group is always present
161+
self.all_groups.remove(group_path)
162+
163+
# # --------------- assertions -----------------
164+
# def check_group_arrays(self, group):
165+
# # note(f"Checking arrays of '{group}'")
166+
# g1 = self.model.get_group(group)
167+
# g2 = zarr.open_group(path=group, mode="r", store=self.repo.store)
168+
# model_arrays = sorted(g1.arrays(), key=itemgetter(0))
169+
# our_arrays = sorted(g2.arrays(), key=itemgetter(0))
170+
# for (n1, a1), (n2, a2) in zip_longest(model_arrays, our_arrays):
171+
# assert n1 == n2
172+
# assert_array_equal(a1, a2)
173+
174+
# def check_subgroups(self, group_path):
175+
# g1 = self.model.get_group(group_path)
176+
# g2 = zarr.open_group(path=group_path, mode="r", store=self.repo.store)
177+
# g1_children = [name for (name, _) in g1.groups()]
178+
# g2_children = [name for (name, _) in g2.groups()]
179+
# # note(f"Checking {len(g1_children)} subgroups of group '{group_path}'")
180+
# assert g1_children == g2_children
181+
182+
# def check_list_prefix_from_group(self, group):
183+
# prefix = f"meta/root/{group}"
184+
# model_list = sorted(self.model.list_prefix(prefix))
185+
# al_list = sorted(self.repo.store.list_prefix(prefix))
186+
# # note(f"Checking {len(model_list)} keys under '{prefix}'")
187+
# assert model_list == al_list
188+
189+
# prefix = f"data/root/{group}"
190+
# model_list = sorted(self.model.list_prefix(prefix))
191+
# al_list = sorted(self.repo.store.list_prefix(prefix))
192+
# # note(f"Checking {len(model_list)} keys under '{prefix}'")
193+
# assert model_list == al_list
194+
195+
# @precondition(lambda self: self.model.is_persistent_session())
196+
# @rule(data=st.data())
197+
# def check_group_path(self, data):
198+
# t0 = time.time()
199+
# group = data.draw(st.sampled_from(self.all_groups))
200+
# self.check_list_prefix_from_group(group)
201+
# self.check_subgroups(group)
202+
# self.check_group_arrays(group)
203+
# t1 = time.time()
204+
# note(f"Checks took {t1 - t0} sec.")
205+
206+
@invariant()
207+
def check_list_prefix_from_root(self):
208+
model_list = self._sync_iter(self.model.list_prefix(""))
209+
store_list = self._sync_iter(self.store.list_prefix(""))
210+
note(f"Checking {len(model_list)} keys")
211+
assert sorted(model_list) == sorted(store_list)
212+
213+
214+
def test_zarr_hierarchy(sync_store: Store):
215+
def mk_test_instance_sync() -> ZarrHierarchyStateMachine:
216+
return ZarrHierarchyStateMachine(sync_store)
217+
218+
if isinstance(sync_store, ZipStore):
219+
pytest.skip(reason="ZipStore does not support delete")
220+
if isinstance(sync_store, MemoryStore):
221+
run_state_machine_as_test(
222+
mk_test_instance_sync, settings=Settings(report_multiple_bugs=False)
223+
)

0 commit comments

Comments
 (0)