Test that chunk request coalescing reduces calls to store

aldenks · aldenks · commit aa01ae8928ac · 2025-05-01T10:15:31.000-04:00
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
@@ -1,5 +1,6 @@
 import pickle
 from typing import Any
+from unittest.mock import AsyncMock
 
 import numpy as np
 import numpy.typing as npt
@@ -9,7 +10,7 @@
 import zarr.api
 import zarr.api.asynchronous
 from zarr import Array
-from zarr.abc.store import Store
+from zarr.abc.store import RangeByteRequest, Store, SuffixByteRequest
 from zarr.codecs import (
     BloscCodec,
     ShardingCodec,
@@ -264,17 +265,24 @@ async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any
 
 @pytest.mark.parametrize("index_location", ["start", "end"])
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+@pytest.mark.parametrize("coalesce_reads", [True, False])
 def test_sharding_multiple_chunks_partial_shard_read(
-    store: Store, index_location: ShardingCodecIndexLocation
+    store: Store, index_location: ShardingCodecIndexLocation, coalesce_reads: bool
 ) -> None:
-    array_shape = (8, 64)
-    shard_shape = (4, 32)
+    array_shape = (16, 64)
+    shard_shape = (8, 32)
     chunk_shape = (2, 4)
-
     data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
 
+    if coalesce_reads:
+        # 1MiB, enough to coalesce all chunks within a shard in this example
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": 2**20})
+    else:
+        zarr.config.set({"sharding.read.coalesce_max_gap_bytes": -1})  # disable coalescing
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
     a = zarr.create_array(
-        StorePath(store),
+        StorePath(store_mock),
         shape=data.shape,
         chunks=chunk_shape,
         shards={"shape": shard_shape, "index_location": index_location},
@@ -284,12 +292,41 @@ def test_sharding_multiple_chunks_partial_shard_read(
     )
     a[:] = data
 
-    # Reads 2.5 (3 full, one partial) chunks each from 2 shards (a subset of both shards)
+    store_mock.reset_mock()  # ignore store calls during array creation
+
+    # Reads 3 (2 full, 1 partial) chunks each from 2 shards (a subset of both shards)
+    # for a total of 6 chunks accessed
     assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32"))
 
-    # Reads 2 chunks from both shards along dimension 0
+    if coalesce_reads:
+        # 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
+        assert store_mock.get.call_count == 4
+    else:
+        # 2 shard index requests + 6 chunks
+        assert store_mock.get.call_count == 8
+
+    for method, args, kwargs in store_mock.method_calls:
+        assert method == "get"
+        assert args[0].startswith("c/")  # get from a chunk
+        assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
+
+    store_mock.reset_mock()
+
+    # Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed
     assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32"))
 
+    if coalesce_reads:
+        # 2 shard index requests + 2 coalesced chunk data byte ranges (one for each shard)
+        assert store_mock.get.call_count == 4
+    else:
+        # 2 shard index requests + 8 chunks
+        assert store_mock.get.call_count == 10
+
+    for method, args, kwargs in store_mock.method_calls:
+        assert method == "get"
+        assert args[0].startswith("c/")  # get from a chunk
+        assert isinstance(kwargs["byte_range"], (SuffixByteRequest, RangeByteRequest))
+
 
 @pytest.mark.parametrize(
     "array_fixture",