Skip to content

Commit a31046c

Browse files
jhammand-v-bpre-commit-ci[bot]
authored
Feature: store learns to delete prefixes when overwriting/creating hierarchy nodes (#2430)
* fix(array): thread order parameter through to array __init__ * type fixes * move defaults * apply MemoryOrder type to ArrayV2Metadata * more more * more more more * feature(store,group,array): stores learn to delete prefixes when overwriting nodes - add Store.delete_dir and Store.delete_prefix - update array and group creation methods to call delete_dir - change list_prefix to return absolue keys * fixup * fixup * respond to review * fixup * fixup * Update src/zarr/abc/store.py * style: pre-commit fixes --------- Co-authored-by: Davis Bennett <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 7bceb58 commit a31046c

File tree

14 files changed

+113
-37
lines changed

14 files changed

+113
-37
lines changed

src/zarr/abc/store.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,8 @@ def list(self) -> AsyncGenerator[str, None]:
342342
@abstractmethod
343343
def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
344344
"""
345-
Retrieve all keys in the store that begin with a given prefix. Keys are returned with the
346-
common leading prefix removed.
345+
Retrieve all keys in the store that begin with a given prefix. Keys are returned relative
346+
to the root of the store.
347347
348348
Parameters
349349
----------
@@ -371,6 +371,20 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
371371
"""
372372
...
373373

374+
async def delete_dir(self, prefix: str) -> None:
375+
"""
376+
Remove all keys and prefixes in the store that begin with a given prefix.
377+
"""
378+
if not self.supports_deletes:
379+
raise NotImplementedError
380+
if not self.supports_listing:
381+
raise NotImplementedError
382+
self._check_writable()
383+
if not prefix.endswith("/"):
384+
prefix += "/"
385+
async for key in self.list_prefix(prefix):
386+
await self.delete(key)
387+
374388
def close(self) -> None:
375389
"""Close the store."""
376390
self._is_open = False

src/zarr/core/array.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,12 @@ async def _create_v3(
553553
attributes: dict[str, JSON] | None = None,
554554
exists_ok: bool = False,
555555
) -> AsyncArray[ArrayV3Metadata]:
556-
if not exists_ok:
556+
if exists_ok:
557+
if store_path.store.supports_deletes:
558+
await store_path.delete_dir()
559+
else:
560+
await ensure_no_existing_node(store_path, zarr_format=3)
561+
else:
557562
await ensure_no_existing_node(store_path, zarr_format=3)
558563

559564
shape = parse_shapelike(shape)
@@ -605,7 +610,12 @@ async def _create_v2(
605610
attributes: dict[str, JSON] | None = None,
606611
exists_ok: bool = False,
607612
) -> AsyncArray[ArrayV2Metadata]:
608-
if not exists_ok:
613+
if exists_ok:
614+
if store_path.store.supports_deletes:
615+
await store_path.delete_dir()
616+
else:
617+
await ensure_no_existing_node(store_path, zarr_format=2)
618+
else:
609619
await ensure_no_existing_node(store_path, zarr_format=2)
610620

611621
if order is None:

src/zarr/core/group.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,13 @@ async def from_store(
404404
zarr_format: ZarrFormat = 3,
405405
) -> AsyncGroup:
406406
store_path = await make_store_path(store)
407-
if not exists_ok:
407+
408+
if exists_ok:
409+
if store_path.store.supports_deletes:
410+
await store_path.delete_dir()
411+
else:
412+
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
413+
else:
408414
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
409415
attributes = attributes or {}
410416
group = cls(
@@ -727,19 +733,8 @@ def _getitem_consolidated(
727733

728734
async def delitem(self, key: str) -> None:
729735
store_path = self.store_path / key
730-
if self.metadata.zarr_format == 3:
731-
await (store_path / ZARR_JSON).delete()
732-
733-
elif self.metadata.zarr_format == 2:
734-
await asyncio.gather(
735-
(store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False
736-
(store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False
737-
(store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True
738-
)
739-
740-
else:
741-
raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}")
742736

737+
await store_path.delete_dir()
743738
if self.metadata.consolidated_metadata:
744739
self.metadata.consolidated_metadata.metadata.pop(key, None)
745740
await self._save_metadata()

src/zarr/storage/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,15 @@ async def delete(self) -> None:
101101
"""
102102
await self.store.delete(self.path)
103103

104+
async def delete_dir(self) -> None:
105+
"""
106+
Delete all keys with the given prefix from the store.
107+
"""
108+
path = self.path
109+
if not path.endswith("/"):
110+
path += "/"
111+
await self.store.delete_dir(path)
112+
104113
async def set_if_not_exists(self, default: Buffer) -> None:
105114
"""
106115
Store a key to ``value`` if the key is not already present.

src/zarr/storage/local.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,8 @@ async def list(self) -> AsyncGenerator[str, None]:
226226

227227
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
228228
# docstring inherited
229-
to_strip = (
230-
(self.root / prefix).as_posix() + "/"
231-
) # TODO: fixme in https://github.com/zarr-developers/zarr-python/issues/2438
229+
to_strip = self.root.as_posix() + "/"
230+
prefix = prefix.rstrip("/")
232231
for p in (self.root / prefix).rglob("*"):
233232
if p.is_file():
234233
yield p.as_posix().replace(to_strip, "")

src/zarr/storage/logging.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
222222
async for key in self._store.list_dir(prefix=prefix):
223223
yield key
224224

225+
async def delete_dir(self, prefix: str) -> None:
226+
# docstring inherited
227+
with self.log(prefix):
228+
await self._store.delete_dir(prefix=prefix)
229+
225230
def with_mode(self, mode: AccessModeLiteral) -> Self:
226231
# docstring inherited
227232
with self.log(mode):

src/zarr/storage/memory.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from logging import getLogger
34
from typing import TYPE_CHECKING, Self
45

56
from zarr.abc.store import ByteRangeRequest, Store
@@ -14,6 +15,9 @@
1415
from zarr.core.common import AccessModeLiteral
1516

1617

18+
logger = getLogger(__name__)
19+
20+
1721
class MemoryStore(Store):
1822
"""
1923
In-memory store for testing purposes.
@@ -137,7 +141,7 @@ async def delete(self, key: str) -> None:
137141
try:
138142
del self._store_dict[key]
139143
except KeyError:
140-
pass
144+
logger.debug("Key %s does not exist.", key)
141145

142146
async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None:
143147
# docstring inherited
@@ -150,9 +154,10 @@ async def list(self) -> AsyncGenerator[str, None]:
150154

151155
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
152156
# docstring inherited
153-
for key in self._store_dict:
157+
# note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix)
158+
for key in list(self._store_dict):
154159
if key.startswith(prefix):
155-
yield key.removeprefix(prefix)
160+
yield key
156161

157162
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
158163
# docstring inherited

src/zarr/storage/remote.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
340340

341341
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
342342
# docstring inherited
343-
find_str = f"{self.path}/{prefix}"
344-
for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False):
345-
yield onefile.removeprefix(find_str)
343+
for onefile in await self.fs._find(
344+
f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False
345+
):
346+
yield onefile.removeprefix(f"{self.path}/")

src/zarr/storage/zip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
244244
# docstring inherited
245245
async for key in self.list():
246246
if key.startswith(prefix):
247-
yield key.removeprefix(prefix)
247+
yield key
248248

249249
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
250250
# docstring inherited

src/zarr/testing/store.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,26 @@ async def test_exists(self, store: S) -> None:
213213
assert await store.exists("foo/zarr.json")
214214

215215
async def test_delete(self, store: S) -> None:
216+
if not store.supports_deletes:
217+
pytest.skip("store does not support deletes")
216218
await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
217219
assert await store.exists("foo/zarr.json")
218220
await store.delete("foo/zarr.json")
219221
assert not await store.exists("foo/zarr.json")
220222

223+
async def test_delete_dir(self, store: S) -> None:
224+
if not store.supports_deletes:
225+
pytest.skip("store does not support deletes")
226+
await store.set("zarr.json", self.buffer_cls.from_bytes(b"root"))
227+
await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(b"root"))
228+
await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
229+
await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk"))
230+
await store.delete_dir("foo")
231+
assert await store.exists("zarr.json")
232+
assert await store.exists("foo-bar/zarr.json")
233+
assert not await store.exists("foo/zarr.json")
234+
assert not await store.exists("foo/c/0")
235+
221236
async def test_empty(self, store: S) -> None:
222237
assert await store.empty()
223238
await self.set(
@@ -249,8 +264,7 @@ async def test_list(self, store: S) -> None:
249264
async def test_list_prefix(self, store: S) -> None:
250265
"""
251266
Test that the `list_prefix` method works as intended. Given a prefix, it should return
252-
all the keys in storage that start with this prefix. Keys should be returned with the shared
253-
prefix removed.
267+
all the keys in storage that start with this prefix.
254268
"""
255269
prefixes = ("", "a/", "a/b/", "a/b/c/")
256270
data = self.buffer_cls.from_bytes(b"")
@@ -264,7 +278,7 @@ async def test_list_prefix(self, store: S) -> None:
264278
expected: tuple[str, ...] = ()
265279
for key in store_dict:
266280
if key.startswith(prefix):
267-
expected += (key.removeprefix(prefix),)
281+
expected += (key,)
268282
expected = tuple(sorted(expected))
269283
assert observed == expected
270284

tests/test_array.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def test_array_creation_existing_node(
5151
new_dtype = "float32"
5252

5353
if exists_ok:
54+
if not store.supports_deletes:
55+
pytest.skip("store does not support deletes")
5456
arr_new = Array.create(
5557
spath / "extant",
5658
shape=new_shape,

tests/test_group.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> N
290290
with pytest.raises(ContainsGroupError):
291291
Group.from_store(store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok)
292292
else:
293+
if not store.supports_deletes:
294+
pytest.skip(
295+
"Store does not support deletes but `exists_ok` is True, requiring deletes to override a group"
296+
)
293297
group_created_again = Group.from_store(
294298
store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok
295299
)
@@ -720,6 +724,8 @@ def test_group_creation_existing_node(
720724
new_attributes = {"new": True}
721725

722726
if exists_ok:
727+
if not store.supports_deletes:
728+
pytest.skip("store does not support deletes but exists_ok is True")
723729
node_new = Group.from_store(
724730
spath / "extant",
725731
attributes=new_attributes,
@@ -1092,7 +1098,9 @@ async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrF
10921098
assert foo_group.attrs == {"foo": 100}
10931099

10941100
# test that we can get the group using require_group and overwrite=True
1095-
foo_group = await root.require_group("foo", overwrite=True)
1101+
if store.supports_deletes:
1102+
foo_group = await root.require_group("foo", overwrite=True)
1103+
assert foo_group.attrs == {}
10961104

10971105
_ = await foo_group.create_array(
10981106
"bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100}
@@ -1371,3 +1379,16 @@ def test_group_deprecated_positional_args(method: str) -> None:
13711379
with pytest.warns(FutureWarning, match=r"Pass name=.*, data=.* as keyword args."):
13721380
arr = getattr(root, method)("foo_like", data, **kwargs)
13731381
assert arr.shape == data.shape
1382+
1383+
1384+
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
1385+
def test_delitem_removes_children(store: Store, zarr_format: ZarrFormat) -> None:
1386+
# https://github.com/zarr-developers/zarr-python/issues/2191
1387+
g1 = zarr.group(store=store, zarr_format=zarr_format)
1388+
g1.create_group("0")
1389+
g1.create_group("0/0")
1390+
arr = g1.create_array("0/0/0", shape=(1,))
1391+
arr[:] = 1
1392+
del g1["0"]
1393+
with pytest.raises(KeyError):
1394+
g1["0/0"]

tests/test_store/test_logging.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,15 @@ async def test_logging_store_counter(store: Store) -> None:
4545
arr[:] = 1
4646

4747
assert wrapped.counter["set"] == 2
48-
assert wrapped.counter["get"] == 0 # 1 if overwrite=False
4948
assert wrapped.counter["list"] == 0
5049
assert wrapped.counter["list_dir"] == 0
5150
assert wrapped.counter["list_prefix"] == 0
51+
if store.supports_deletes:
52+
assert wrapped.counter["get"] == 0 # 1 if overwrite=False
53+
assert wrapped.counter["delete_dir"] == 1
54+
else:
55+
assert wrapped.counter["get"] == 1
56+
assert wrapped.counter["delete_dir"] == 0
5257

5358

5459
async def test_with_mode():

tests/test_store/test_zip.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from zarr.testing.store import StoreTests
1515

1616
if TYPE_CHECKING:
17-
from collections.abc import Coroutine
1817
from typing import Any
1918

2019

@@ -65,9 +64,6 @@ def test_store_supports_partial_writes(self, store: ZipStore) -> None:
6564
def test_store_supports_listing(self, store: ZipStore) -> None:
6665
assert store.supports_listing
6766

68-
def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]:
69-
pass
70-
7167
def test_api_integration(self, store: ZipStore) -> None:
7268
root = zarr.open_group(store=store)
7369

@@ -103,4 +99,4 @@ async def test_with_mode(self, store: ZipStore) -> None:
10399

104100
@pytest.mark.parametrize("mode", ["a", "w"])
105101
async def test_store_open_mode(self, store_kwargs: dict[str, Any], mode: str) -> None:
106-
super().test_store_open_mode(store_kwargs, mode)
102+
await super().test_store_open_mode(store_kwargs, mode)

0 commit comments

Comments
 (0)