@@ -198,6 +198,7 @@ def test_sharding_partial_read(
198
198
assert np .all (read_data == 1 )
199
199
200
200
201
+ @pytest .mark .skip ("This is profiling rather than a test" )
201
202
@pytest .mark .slow_hypothesis
202
203
@pytest .mark .parametrize ("store" , ["local" ], indirect = ["store" ])
203
204
def test_partial_shard_read_performance (store : Store ) -> None :
@@ -231,10 +232,18 @@ def test_partial_shard_read_performance(store: Store) -> None:
231
232
232
233
num_calls = 20
233
234
experiments = []
234
- for concurrency , get_latency , statement in product (
235
- [1 , 10 , 100 ], [0.0 , 0.01 ], ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ]
235
+ for concurrency , get_latency , coalesce_max_gap , statement in product (
236
+ [1 , 10 , 100 ],
237
+ [0.0 , 0.01 ],
238
+ [- 1 , 2 ** 20 , 10 * 2 ** 20 ],
239
+ ["a[0, :, :]" , "a[:, 0, :]" , "a[:, :, 0]" ],
236
240
):
237
- zarr .config .set ({"async.concurrency" : concurrency })
241
+ zarr .config .set (
242
+ {
243
+ "async.concurrency" : concurrency ,
244
+ "sharding.read.coalesce_max_gap_bytes" : coalesce_max_gap ,
245
+ }
246
+ )
238
247
239
248
async def get_with_latency (* args : Any , get_latency : float , ** kwargs : Any ) -> Any :
240
249
await asyncio .sleep (get_latency )
@@ -252,14 +261,15 @@ async def get_with_latency(*args: Any, get_latency: float, **kwargs: Any) -> Any
252
261
experiments .append (
253
262
{
254
263
"concurrency" : concurrency ,
255
- "statement " : statement ,
264
+ "coalesce_max_gap " : coalesce_max_gap ,
256
265
"get_latency" : get_latency ,
266
+ "statement" : statement ,
257
267
"time" : time ,
258
268
"store_get_calls" : store_mock .get .call_count ,
259
269
}
260
270
)
261
271
262
- with open ("zarr-python-partial-shard-read-performance.json" , "w" ) as f :
272
+ with open ("zarr-python-partial-shard-read-performance-with-coalesce .json" , "w" ) as f :
263
273
json .dump (experiments , f )
264
274
265
275
0 commit comments