Skip to content

Commit 57ac36f

Browse files
committed
Test that chunk request coalescing reduces calls to store
1 parent 5547a21 commit 57ac36f

File tree

1 file changed

+45
-8
lines changed

1 file changed

+45
-8
lines changed

tests/test_codecs/test_sharding.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pickle
22
from typing import Any
3+
from unittest.mock import AsyncMock
34

45
import numpy as np
56
import numpy.typing as npt
@@ -9,7 +10,7 @@
910
import zarr.api
1011
import zarr.api.asynchronous
1112
from zarr import Array
12-
from zarr.abc.store import Store
13+
from zarr.abc.store import RangeByteRequest, Store, SuffixByteRequest
1314
from zarr.codecs import (
1415
BloscCodec,
1516
ShardingCodec,
@@ -249,17 +250,24 @@ def test_partial_shard_read_performance(store: Store) -> None:
249250

250251
@pytest.mark.parametrize("index_location", ["start", "end"])
251252
@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
253+
@pytest.mark.parametrize("coalesce_reads", [True, False])
252254
def test_sharding_multiple_chunks_partial_shard_read(
253-
store: Store, index_location: ShardingCodecIndexLocation
255+
store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
254256
) -> None:
255-
array_shape = (8, 64)
256-
shard_shape = (4, 32)
257+
array_shape = (16, 64)
258+
shard_shape = (8, 32)
257259
chunk_shape = (2, 4)
258-
259260
data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
260261

262+
if coalesce_reads:
263+
# 1MiB, enough to coalesce all chunks within a shard in this example
264+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
265+
else:
266+
zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1}) # disable coalescing
267+
268+
store_mock = AsyncMock(wraps=store, spec=store.__class__)
261269
a = zarr.create_array(
262-
StorePath(store),
270+
StorePath(store_mock),
263271
shape=data.shape,
264272
chunks=chunk_shape,
265273
shards={"shape": shard_shape, "index_location": index_location},
@@ -269,12 +277,41 @@ def test_sharding_multiple_chunks_partial_shard_read(
269277
)
270278
a[:] = data
271279

272-
# Reads 2.5 (3 full, one partial) chunks each from 2 shards (a subset of both shards)
280+
store_mock.reset_mock() # ignore store calls during array creation
281+
282+
# Reads 3 (2 full, 1 partial) chunks each from 2 shards (a subset of both shards)
283+
# for a total of 6 chunks accessed
273284
assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32"))
274285

275-
# Reads 2 chunks from both shards along dimension 0
286+
if coalesce_reads:
287+
# 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
288+
assert store_mock.get.call_count == 4
289+
else:
290+
# 2 shard index requests + 6 chunks
291+
assert store_mock.get.call_count == 8
292+
293+
for method, args, kwargs in store_mock.method_calls:
294+
assert method == "get"
295+
assert args[0].startswith("c/") # get from a chunk
296+
assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
297+
298+
store_mock.reset_mock()
299+
300+
# Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed
276301
assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32"))
277302

303+
if coalesce_reads:
304+
# 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
305+
assert store_mock.get.call_count == 4
306+
else:
307+
# 2 shard index requests + 8 chunks
308+
assert store_mock.get.call_count == 10
309+
310+
for method, args, kwargs in store_mock.method_calls:
311+
assert method == "get"
312+
assert args[0].startswith("c/") # get from a chunk
313+
assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
314+
278315

279316
@pytest.mark.parametrize(
280317
"array_fixture",

0 commit comments

Comments
 (0)