Skip to content

Commit eb25424

Browse files
will-moorejhammandcherian
authored
Fix json indent (#2546)
* Fix usage of config json_indent in V3JsonEncoder * Add test for json_indent * parametrize json indent * Add None to indent test parameters * ruff fix * other ruff fixes * Update src/zarr/core/metadata/v3.py Co-authored-by: Joe Hamman <[email protected]> * Use explicit json encoder args * Add types * Update byte counts for tests --------- Co-authored-by: Joe Hamman <[email protected]> Co-authored-by: Deepak Cherian <[email protected]>
1 parent 8bb0b34 commit eb25424

File tree

6 files changed

+53
-23
lines changed

6 files changed

+53
-23
lines changed

docs/user-guide/arrays.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ prints additional diagnostics, e.g.::
209209
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
210210
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
211211
No. bytes : 400000000 (381.5M)
212-
No. bytes stored : 9696302
212+
No. bytes stored : 9696520
213213
Storage ratio : 41.3
214214
Chunks Initialized : 100
215215

@@ -611,7 +611,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
611611
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
612612
Compressors : (ZstdCodec(level=0, checksum=False),)
613613
No. bytes : 100000000 (95.4M)
614-
No. bytes stored : 3981060
614+
No. bytes stored : 3981552
615615
Storage ratio : 25.1
616616
Shards Initialized : 100
617617

docs/user-guide/groups.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ property. E.g.::
113113
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
114114
Compressors : (ZstdCodec(level=0, checksum=False),)
115115
No. bytes : 8000000 (7.6M)
116-
No. bytes stored : 1432
117-
Storage ratio : 5586.6
116+
No. bytes stored : 1614
117+
Storage ratio : 4956.6
118118
Chunks Initialized : 0
119119
>>> baz.info
120120
Type : Array

docs/user-guide/performance.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ ratios, depending on the correlation structure within the data. E.g.::
131131
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
132132
Compressors : (ZstdCodec(level=0, checksum=False),)
133133
No. bytes : 400000000 (381.5M)
134-
No. bytes stored : 342588717
134+
No. bytes stored : 342588911
135135
Storage ratio : 1.2
136136
Chunks Initialized : 100
137137
>>> with zarr.config.set({'array.order': 'F'}):
@@ -150,7 +150,7 @@ ratios, depending on the correlation structure within the data. E.g.::
150150
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
151151
Compressors : (ZstdCodec(level=0, checksum=False),)
152152
No. bytes : 400000000 (381.5M)
153-
No. bytes stored : 342588717
153+
No. bytes stored : 342588911
154154
Storage ratio : 1.2
155155
Chunks Initialized : 100
156156

src/zarr/core/metadata/v3.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from zarr.core.buffer.core import default_buffer_prototype
88

99
if TYPE_CHECKING:
10+
from collections.abc import Callable
1011
from typing import Self
1112

1213
from zarr.core.buffer import Buffer, BufferPrototype
@@ -143,9 +144,30 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
143144

144145

145146
class V3JsonEncoder(json.JSONEncoder):
146-
def __init__(self, *args: Any, **kwargs: Any) -> None:
147-
self.indent = kwargs.pop("indent", config.get("json_indent"))
148-
super().__init__(*args, **kwargs)
147+
def __init__(
148+
self,
149+
*,
150+
skipkeys: bool = False,
151+
ensure_ascii: bool = True,
152+
check_circular: bool = True,
153+
allow_nan: bool = True,
154+
sort_keys: bool = False,
155+
indent: int | None = None,
156+
separators: tuple[str, str] | None = None,
157+
default: Callable[[object], object] | None = None,
158+
) -> None:
159+
if indent is None:
160+
indent = config.get("json_indent")
161+
super().__init__(
162+
skipkeys=skipkeys,
163+
ensure_ascii=ensure_ascii,
164+
check_circular=check_circular,
165+
allow_nan=allow_nan,
166+
sort_keys=sort_keys,
167+
indent=indent,
168+
separators=separators,
169+
default=default,
170+
)
149171

150172
def default(self, o: object) -> Any:
151173
if isinstance(o, np.dtype):

tests/test_array.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -399,27 +399,27 @@ async def test_chunks_initialized() -> None:
399399
def test_nbytes_stored() -> None:
400400
arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()])
401401
result = arr.nbytes_stored()
402-
assert result == 366 # the size of the metadata document. This is a fragile test.
402+
assert result == 502 # the size of the metadata document. This is a fragile test.
403403
arr[:50] = 1
404404
result = arr.nbytes_stored()
405-
assert result == 566 # the size with 5 chunks filled.
405+
assert result == 702 # the size with 5 chunks filled.
406406
arr[50:] = 2
407407
result = arr.nbytes_stored()
408-
assert result == 766 # the size with all chunks filled.
408+
assert result == 902 # the size with all chunks filled.
409409

410410

411411
async def test_nbytes_stored_async() -> None:
412412
arr = await zarr.api.asynchronous.create(
413413
shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()]
414414
)
415415
result = await arr.nbytes_stored()
416-
assert result == 366 # the size of the metadata document. This is a fragile test.
416+
assert result == 502 # the size of the metadata document. This is a fragile test.
417417
await arr.setitem(slice(50), 1)
418418
result = await arr.nbytes_stored()
419-
assert result == 566 # the size with 5 chunks filled.
419+
assert result == 702 # the size with 5 chunks filled.
420420
await arr.setitem(slice(50, 100), 2)
421421
result = await arr.nbytes_stored()
422-
assert result == 766 # the size with all chunks filled.
422+
assert result == 902 # the size with all chunks filled.
423423

424424

425425
def test_default_fill_values() -> None:
@@ -537,19 +537,19 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
537537
_serializer=BytesCodec(),
538538
_count_bytes=512,
539539
_count_chunks_initialized=0,
540-
_count_bytes_stored=373 if shards is None else 578, # the metadata?
540+
_count_bytes_stored=521 if shards is None else 982, # the metadata?
541541
)
542542
assert result == expected
543543

544544
arr[:4, :4] = 10
545545
result = arr.info_complete()
546546
if shards is None:
547547
expected = dataclasses.replace(
548-
expected, _count_chunks_initialized=4, _count_bytes_stored=501
548+
expected, _count_chunks_initialized=4, _count_bytes_stored=649
549549
)
550550
else:
551551
expected = dataclasses.replace(
552-
expected, _count_chunks_initialized=1, _count_bytes_stored=774
552+
expected, _count_chunks_initialized=1, _count_bytes_stored=1178
553553
)
554554
assert result == expected
555555

@@ -624,21 +624,20 @@ async def test_info_complete_async(
624624
_serializer=BytesCodec(),
625625
_count_bytes=512,
626626
_count_chunks_initialized=0,
627-
_count_bytes_stored=373 if shards is None else 578, # the metadata?
627+
_count_bytes_stored=521 if shards is None else 982, # the metadata?
628628
)
629629
assert result == expected
630630

631631
await arr.setitem((slice(4), slice(4)), 10)
632632
result = await arr.info_complete()
633633
if shards is None:
634634
expected = dataclasses.replace(
635-
expected, _count_chunks_initialized=4, _count_bytes_stored=501
635+
expected, _count_chunks_initialized=4, _count_bytes_stored=553
636636
)
637637
else:
638638
expected = dataclasses.replace(
639-
expected, _count_chunks_initialized=1, _count_bytes_stored=774
639+
expected, _count_chunks_initialized=1, _count_bytes_stored=1178
640640
)
641-
assert result == expected
642641

643642

644643
@pytest.mark.parametrize("store", ["memory"], indirect=True)

tests/test_metadata/test_v3.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from zarr.codecs.bytes import BytesCodec
1111
from zarr.core.buffer import default_buffer_prototype
1212
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
13-
from zarr.core.group import parse_node_type
13+
from zarr.core.config import config
14+
from zarr.core.group import GroupMetadata, parse_node_type
1415
from zarr.core.metadata.v3 import (
1516
ArrayV3Metadata,
1617
DataType,
@@ -304,6 +305,14 @@ def test_metadata_to_dict(
304305
assert observed == expected
305306

306307

308+
@pytest.mark.parametrize("indent", [2, 4, None])
309+
def test_json_indent(indent: int):
310+
with config.set({"json_indent": indent}):
311+
m = GroupMetadata()
312+
d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
313+
assert d == json.dumps(json.loads(d), indent=indent).encode()
314+
315+
307316
# @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
308317
# @pytest.mark.parametrize("precision", ["ns", "D"])
309318
# async def test_datetime_metadata(fill_value: int, precision: str) -> None:

0 commit comments

Comments
 (0)