Skip to content

Commit aa01ae8

Browse files
committed
Test that chunk request coalescing reduces calls to store
1 parent 3df3ca0 commit aa01ae8

File tree

1 file changed

+45
-8
lines changed

1 file changed

+45
-8
lines changed

tests/test_codecs/test_sharding.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pickle
22
from typing import Any
3+
from unittest.mock import AsyncMock
34

45
import numpy as np
56
import numpy.typing as npt
@@ -9,7 +10,7 @@
910
import zarr.api
1011
import zarr.api.asynchronous
1112
from zarr import Array
12-
from zarr.abc.store import Store
13+
from zarr.abc.store import RangeByteRequest, Store, SuffixByteRequest
1314
from zarr.codecs import (
1415
BloscCodec,
1516
ShardingCodec,
@@ -264,17 +265,24 @@ async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any
264265

265266
@pytest.mark.parametrize("index_location", ["start", "end"])
266267
@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
268+
@pytest.mark.parametrize("coalesce_reads", [True, False])
267269
def test_sharding_multiple_chunks_partial_shard_read(
268-
store: Store, index_location: ShardingCodecIndexLocation
270+
store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
269271
) -> None:
270-
array_shape = (8, 64)
271-
shard_shape = (4, 32)
272+
array_shape = (16, 64)
273+
shard_shape = (8, 32)
272274
chunk_shape = (2, 4)
273-
274275
data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
275276

277+
if coalesce_reads:
278+
# 1MiB, enough to coalesce all chunks within a shard in this example
279+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
280+
else:
281+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1}) # disable coalescing
282+
283+
store_mock = AsyncMock(wraps=store, spec=store.__class__)
276284
a = zarr.create_array(
277-
StorePath(store),
285+
StorePath(store_mock),
278286
shape=data.shape,
279287
chunks=chunk_shape,
280288
shards={"shape": shard_shape, "index_location": index_location},
@@ -284,12 +292,41 @@ def test_sharding_multiple_chunks_partial_shard_read(
284292
)
285293
a[:] = data
286294

287-
# Reads 2.5 (3 full, one partial) chunks each from 2 shards (a subset of both shards)
295+
store_mock.reset_mock() # ignore store calls during array creation
296+
297+
# Reads 3 (2 full, 1 partial) chunks each from 2 shards (a subset of both shards)
298+
# for a total of 6 chunks accessed
288299
assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32"))
289300

290-
# Reads 2 chunks from both shards along dimension 0
301+
if coalesce_reads:
302+
# 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
303+
assert store_mock.get.call_count == 4
304+
else:
305+
# 2 shard index requests + 6 chunks
306+
assert store_mock.get.call_count == 8
307+
308+
for method, args, kwargs in store_mock.method_calls:
309+
assert method == "get"
310+
assert args[0].startswith("c/") # get from a chunk
311+
assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
312+
313+
store_mock.reset_mock()
314+
315+
# Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed
291316
assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32"))
292317

318+
if coalesce_reads:
319+
# 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
320+
assert store_mock.get.call_count == 4
321+
else:
322+
# 2 shard index requests + 8 chunks
323+
assert store_mock.get.call_count == 10
324+
325+
for method, args, kwargs in store_mock.method_calls:
326+
assert method == "get"
327+
assert args[0].startswith("c/") # get from a chunk
328+
assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
329+
293330

294331
@pytest.mark.parametrize(
295332
"array_fixture",

0 commit comments

Comments
 (0)