Skip to content

Commit 5ff3fbe

Browse files
ilan-golddstansby
andauthored
(fix): use typesize on Blosc codec (#2962)
* (fix): use `typesize` on `Blosc` codec * (chore): relnote * (fix): intersphinx * (fix): look at that compression ratio! * (fix): add test * (fix): min version * (fix): parenthesis? * (fix): try assertion error * (fix): windows size * (fix): add bytes print * (fix): aghh windows latest is correct, error for non latest * (fix): conditions for sizes * (fix): try clearer data * (fix): awesome! * (fix): pre-commit --------- Co-authored-by: David Stansby <[email protected]>
1 parent 0465c2b commit 5ff3fbe

File tree

4 files changed

+26
-2
lines changed

4 files changed

+26
-2
lines changed

changes/2962.fix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Internally use `typesize` constructor parameter for :class:`numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels.

docs/user-guide/arrays.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ prints additional diagnostics, e.g.::
209209
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
210210
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
211211
No. bytes : 400000000 (381.5M)
212-
No. bytes stored : 9696520
213-
Storage ratio : 41.3
212+
No. bytes stored : 3558573
213+
Storage ratio : 112.4
214214
Chunks Initialized : 100
215215

216216
.. note::

src/zarr/codecs/blosc.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numcodecs
1010
from numcodecs.blosc import Blosc
11+
from packaging.version import Version
1112

1213
from zarr.abc.codec import BytesBytesCodec
1314
from zarr.core.buffer.cpu import as_numpy_array_wrapper
@@ -163,6 +164,9 @@ def _blosc_codec(self) -> Blosc:
163164
"shuffle": map_shuffle_str_to_int[self.shuffle],
164165
"blocksize": self.blocksize,
165166
}
167+
# See https://github.com/zarr-developers/numcodecs/pull/713
168+
if Version(numcodecs.__version__) >= Version("0.16.0"):
169+
config_dict["typesize"] = self.typesize
166170
return Blosc.from_config(config_dict)
167171

168172
async def _decode_single(

tests/test_codecs/test_blosc.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import json
22

3+
import numcodecs
34
import numpy as np
45
import pytest
6+
from packaging.version import Version
57

68
import zarr
79
from zarr.abc.store import Store
@@ -54,3 +56,20 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None:
5456
assert blosc_configuration_json["shuffle"] == "bitshuffle"
5557
else:
5658
assert blosc_configuration_json["shuffle"] == "shuffle"
59+
60+
61+
async def test_typesize() -> None:
62+
a = np.arange(1000000, dtype=np.uint64)
63+
codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()]
64+
z = zarr.array(a, chunks=(10000), codecs=codecs)
65+
data = await z.store.get("c/0", prototype=default_buffer_prototype())
66+
assert data is not None
67+
bytes = data.to_bytes()
68+
size = len(bytes)
69+
msg = f"Blosc size mismatch. First 10 bytes: {bytes[:20]!r} and last 10 bytes: {bytes[-20:]!r}"
70+
if Version(numcodecs.__version__) >= Version("0.16.0"):
71+
expected_size = 402
72+
assert size == expected_size, msg
73+
else:
74+
expected_size = 10216
75+
assert size == expected_size, msg

0 commit comments

Comments
 (0)