Skip to content

Commit e5135f9

Browse files
Added info for Group and Array (#2400)
* Added group info * Basic array * fixup * docs, tests * fixup * fixup * docstringsx * fixup * chunk info * fixup * wip - split to chunks_initialized * fixup * fixup * update docs * fixup test * lint * fixup test --------- Co-authored-by: Joe Hamman <[email protected]>
1 parent c3beb50 commit e5135f9

File tree

6 files changed

+545
-18
lines changed

6 files changed

+545
-18
lines changed

src/zarr/core/_info.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import dataclasses
2+
import textwrap
3+
from typing import Any, Literal
4+
5+
import numcodecs.abc
6+
import numpy as np
7+
8+
from zarr.abc.codec import Codec
9+
from zarr.core.metadata.v3 import DataType
10+
11+
12+
@dataclasses.dataclass(kw_only=True)
13+
class GroupInfo:
14+
"""
15+
Visual summary for a Group.
16+
17+
Note that this method and its properties is not part of
18+
Zarr's public API.
19+
"""
20+
21+
_name: str
22+
_type: Literal["Group"] = "Group"
23+
_zarr_format: Literal[2, 3]
24+
_read_only: bool
25+
_store_type: str
26+
_count_members: int | None = None
27+
_count_arrays: int | None = None
28+
_count_groups: int | None = None
29+
30+
def __repr__(self) -> str:
31+
template = textwrap.dedent("""\
32+
Name : {_name}
33+
Type : {_type}
34+
Zarr format : {_zarr_format}
35+
Read-only : {_read_only}
36+
Store type : {_store_type}""")
37+
38+
if self._count_members is not None:
39+
template += "\nNo. members : {_count_members}"
40+
if self._count_arrays is not None:
41+
template += "\nNo. arrays : {_count_arrays}"
42+
if self._count_groups is not None:
43+
template += "\nNo. groups : {_count_groups}"
44+
return template.format(**dataclasses.asdict(self))
45+
46+
47+
def human_readable_size(size: int) -> str:
48+
if size < 2**10:
49+
return f"{size}"
50+
elif size < 2**20:
51+
return f"{size / float(2**10):.1f}K"
52+
elif size < 2**30:
53+
return f"{size / float(2**20):.1f}M"
54+
elif size < 2**40:
55+
return f"{size / float(2**30):.1f}G"
56+
elif size < 2**50:
57+
return f"{size / float(2**40):.1f}T"
58+
else:
59+
return f"{size / float(2**50):.1f}P"
60+
61+
62+
def byte_info(size: int) -> str:
63+
if size < 2**10:
64+
return str(size)
65+
else:
66+
return f"{size} ({human_readable_size(size)})"
67+
68+
69+
@dataclasses.dataclass(kw_only=True)
70+
class ArrayInfo:
71+
"""
72+
Visual summary for an Array.
73+
74+
Note that this method and its properties is not part of
75+
Zarr's public API.
76+
"""
77+
78+
_type: Literal["Array"] = "Array"
79+
_zarr_format: Literal[2, 3]
80+
_data_type: np.dtype[Any] | DataType
81+
_shape: tuple[int, ...]
82+
_chunk_shape: tuple[int, ...] | None = None
83+
_order: Literal["C", "F"]
84+
_read_only: bool
85+
_store_type: str
86+
_compressor: numcodecs.abc.Codec | None = None
87+
_filters: tuple[numcodecs.abc.Codec, ...] | None = None
88+
_codecs: list[Codec] | None = None
89+
_count_bytes: int | None = None
90+
_count_bytes_stored: int | None = None
91+
_count_chunks_initialized: int | None = None
92+
93+
def __repr__(self) -> str:
94+
template = textwrap.dedent("""\
95+
Type : {_type}
96+
Zarr format : {_zarr_format}
97+
Data type : {_data_type}
98+
Shape : {_shape}
99+
Chunk shape : {_chunk_shape}
100+
Order : {_order}
101+
Read-only : {_read_only}
102+
Store type : {_store_type}""")
103+
104+
kwargs = dataclasses.asdict(self)
105+
if self._chunk_shape is None:
106+
# for non-regular chunk grids
107+
kwargs["chunk_shape"] = "<variable>"
108+
if self._compressor is not None:
109+
template += "\nCompressor : {_compressor}"
110+
111+
if self._filters is not None:
112+
template += "\nFilters : {_filters}"
113+
114+
if self._codecs is not None:
115+
template += "\nCodecs : {_codecs}"
116+
117+
if self._count_bytes is not None:
118+
template += "\nNo. bytes : {_count_bytes}"
119+
kwargs["_count_bytes"] = byte_info(self._count_bytes)
120+
121+
if self._count_bytes_stored is not None:
122+
template += "\nNo. bytes stored : {_count_bytes_stored}"
123+
kwargs["_count_stored"] = byte_info(self._count_bytes_stored)
124+
125+
if (
126+
self._count_bytes is not None
127+
and self._count_bytes_stored is not None
128+
and self._count_bytes_stored > 0
129+
):
130+
template += "\nStorage ratio : {_storage_ratio}"
131+
kwargs["_storage_ratio"] = f"{self._count_bytes / self._count_bytes_stored:.1f}"
132+
133+
if self._count_chunks_initialized is not None:
134+
template += "\nChunks Initialized : {_count_chunks_initialized}"
135+
return template.format(**kwargs)

src/zarr/core/array.py

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from zarr.abc.store import Store, set_or_delete
1515
from zarr.codecs import _get_default_array_bytes_codec
1616
from zarr.codecs._v2 import V2Codec
17+
from zarr.core._info import ArrayInfo
1718
from zarr.core.attributes import Attributes
1819
from zarr.core.buffer import (
1920
BufferPrototype,
@@ -1332,9 +1333,65 @@ async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
13321333
def __repr__(self) -> str:
13331334
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>"
13341335

1335-
async def info(self) -> None:
1336+
@property
1337+
def info(self) -> Any:
1338+
"""
1339+
Return the statically known information for an array.
1340+
1341+
Returns
1342+
-------
1343+
ArrayInfo
1344+
1345+
See Also
1346+
--------
1347+
AsyncArray.info_complete
1348+
All information about a group, including dynamic information
1349+
like the number of bytes and chunks written.
1350+
"""
1351+
return self._info()
1352+
1353+
async def info_complete(self) -> Any:
1354+
# TODO: get the size of the object from the store.
1355+
extra = {
1356+
"count_chunks_initialized": await self.nchunks_initialized(),
1357+
# count_bytes_stored isn't yet implemented.
1358+
}
1359+
return self._info(extra=extra)
1360+
13361361
raise NotImplementedError
13371362

1363+
def _info(self, extra: dict[str, int] | None = None) -> Any:
1364+
kwargs: dict[str, Any] = {}
1365+
if self.metadata.zarr_format == 2:
1366+
assert isinstance(self.metadata, ArrayV2Metadata)
1367+
if self.metadata.compressor is not None:
1368+
kwargs["_compressor"] = self.metadata.compressor
1369+
if self.metadata.filters is not None:
1370+
kwargs["_filters"] = self.metadata.filters
1371+
kwargs["_data_type"] = self.metadata.dtype
1372+
kwargs["_chunk_shape"] = self.metadata.chunks
1373+
else:
1374+
kwargs["_codecs"] = self.metadata.codecs
1375+
kwargs["_data_type"] = self.metadata.data_type
1376+
# just regular?
1377+
chunk_grid = self.metadata.chunk_grid
1378+
if isinstance(chunk_grid, RegularChunkGrid):
1379+
kwargs["_chunk_shape"] = chunk_grid.chunk_shape
1380+
else:
1381+
raise NotImplementedError(
1382+
"'info' is not yet implemented for chunk grids of type {type(self.metadata.chunk_grid)}"
1383+
)
1384+
1385+
return ArrayInfo(
1386+
_zarr_format=self.metadata.zarr_format,
1387+
_shape=self.shape,
1388+
_order=self.order,
1389+
_read_only=self.store_path.store.mode.readonly,
1390+
_store_type=type(self.store_path.store).__name__,
1391+
_count_bytes=self.dtype.itemsize * self.size,
1392+
**kwargs,
1393+
)
1394+
13381395

13391396
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
13401397
@dataclass(frozen=False)
@@ -3099,10 +3156,58 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
30993156
def __repr__(self) -> str:
31003157
return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>"
31013158

3102-
def info(self) -> None:
3103-
return sync(
3104-
self._async_array.info(),
3105-
)
3159+
@property
3160+
def info(self) -> Any:
3161+
"""
3162+
Return the statically known information for an array.
3163+
3164+
Returns
3165+
-------
3166+
ArrayInfo
3167+
3168+
See Also
3169+
--------
3170+
Array.info_complete
3171+
All information about a group, including dynamic information
3172+
like the number of bytes and chunks written.
3173+
3174+
Examples
3175+
--------
3176+
>>> arr = zarr.create(shape=(10,), chunks=(2,), dtype="float32")
3177+
>>> arr.info
3178+
Type : Array
3179+
Zarr format : 3
3180+
Data type : DataType.float32
3181+
Shape : (10,)
3182+
Chunk shape : (2,)
3183+
Order : C
3184+
Read-only : False
3185+
Store type : MemoryStore
3186+
Codecs : [BytesCodec(endian=<Endian.little: 'little'>)]
3187+
No. bytes : 40
3188+
"""
3189+
return self._async_array.info
3190+
3191+
def info_complete(self) -> Any:
3192+
"""
3193+
Returns all the information about an array, including information from the Store.
3194+
3195+
In addition to the statically known information like ``name`` and ``zarr_format``,
3196+
this includes additional information like the size of the array in bytes and
3197+
the number of chunks written.
3198+
3199+
Note that this method will need to read metadata from the store.
3200+
3201+
Returns
3202+
-------
3203+
ArrayInfo
3204+
3205+
See Also
3206+
--------
3207+
Array.info
3208+
The statically known subset of metadata about an array.
3209+
"""
3210+
return sync(self._async_array.info_complete())
31063211

31073212

31083213
async def chunks_initialized(

src/zarr/core/group.py

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from zarr._compat import _deprecate_positional_args
1818
from zarr.abc.metadata import Metadata
1919
from zarr.abc.store import Store, set_or_delete
20+
from zarr.core._info import GroupInfo
2021
from zarr.core.array import Array, AsyncArray, _build_parents
2122
from zarr.core.attributes import Attributes
2223
from zarr.core.buffer import default_buffer_prototype
@@ -805,8 +806,72 @@ def attrs(self) -> dict[str, Any]:
805806
return self.metadata.attributes
806807

807808
@property
808-
def info(self) -> None:
809-
raise NotImplementedError
809+
def info(self) -> Any:
810+
"""
811+
Return a visual representation of the statically known information about a group.
812+
813+
Note that this doesn't include dynamic information, like the number of child
814+
Groups or Arrays.
815+
816+
Returns
817+
-------
818+
GroupInfo
819+
820+
See Also
821+
--------
822+
AsyncGroup.info_complete
823+
All information about a group, including dynamic information
824+
"""
825+
826+
if self.metadata.consolidated_metadata:
827+
members = list(self.metadata.consolidated_metadata.flattened_metadata.values())
828+
else:
829+
members = None
830+
return self._info(members=members)
831+
832+
async def info_complete(self) -> Any:
833+
"""
834+
Return all the information for a group.
835+
836+
This includes dynamic information like the number
837+
of child Groups or Arrays. If this group doesn't contain consolidated
838+
metadata then this will need to read from the backing Store.
839+
840+
Returns
841+
-------
842+
GroupInfo
843+
844+
See Also
845+
--------
846+
AsyncGroup.info
847+
"""
848+
members = [x[1].metadata async for x in self.members(max_depth=None)]
849+
return self._info(members=members)
850+
851+
def _info(
852+
self, members: list[ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] | None = None
853+
) -> Any:
854+
kwargs = {}
855+
if members is not None:
856+
kwargs["_count_members"] = len(members)
857+
count_arrays = 0
858+
count_groups = 0
859+
for member in members:
860+
if isinstance(member, GroupMetadata):
861+
count_groups += 1
862+
else:
863+
count_arrays += 1
864+
kwargs["_count_arrays"] = count_arrays
865+
kwargs["_count_groups"] = count_groups
866+
867+
return GroupInfo(
868+
_name=self.store_path.path,
869+
_read_only=self.store_path.store.mode.readonly,
870+
_store_type=type(self.store_path.store).__name__,
871+
_zarr_format=self.metadata.zarr_format,
872+
# maybe do a typeddict
873+
**kwargs, # type: ignore[arg-type]
874+
)
810875

811876
@property
812877
def store(self) -> Store:
@@ -1454,8 +1519,38 @@ def attrs(self) -> Attributes:
14541519
return Attributes(self)
14551520

14561521
@property
1457-
def info(self) -> None:
1458-
raise NotImplementedError
1522+
def info(self) -> Any:
1523+
"""
1524+
Return the statically known information for a group.
1525+
1526+
Returns
1527+
-------
1528+
GroupInfo
1529+
1530+
See Also
1531+
--------
1532+
Group.info_complete
1533+
All information about a group, including dynamic information
1534+
like the children members.
1535+
"""
1536+
return self._async_group.info
1537+
1538+
def info_complete(self) -> Any:
1539+
"""
1540+
Return information for a group.
1541+
1542+
If this group doesn't contain consolidated metadata then
1543+
this will need to read from the backing Store.
1544+
1545+
Returns
1546+
-------
1547+
GroupInfo
1548+
1549+
See Also
1550+
--------
1551+
Group.info
1552+
"""
1553+
return self._sync(self._async_group.info_complete())
14591554

14601555
@property
14611556
def store(self) -> Store:

0 commit comments

Comments
 (0)