Description
What happened?
In a test, we have two datasets, which are essentially the same, but one has two time values and the other has one time value.
When loaded with xr.open_zarr
, selecting the values at a single time point works with the dataset that has two time values, but raises the following error for the dataset with one time value:
numcodecs/blosc.pyx:365: in numcodecs.blosc.decompress
error: ValueError: buffer source array is read-only
This is the full traceback:
.venv/lib/python3.10/site-packages/xarray/core/dataarray.py:811: in values
return self.variable.values
.venv/lib/python3.10/site-packages/xarray/core/variable.py:554: in values
return _as_array_or_item(self._data)
.venv/lib/python3.10/site-packages/xarray/core/variable.py:352: in _as_array_or_item
data = np.asarray(data)
.venv/lib/python3.10/site-packages/dask/array/core.py:1700: in __array__
x = self.compute()
.venv/lib/python3.10/site-packages/dask/base.py:375: in compute
(result,) = compute(self, traverse=False, **kwargs)
.venv/lib/python3.10/site-packages/dask/base.py:661: in compute
results = schedule(dsk, keys, **kwargs)
.venv/lib/python3.10/site-packages/xarray/core/indexing.py:580: in __array__
return np.asarray(self.get_duck_array(), dtype=dtype)
.venv/lib/python3.10/site-packages/xarray/core/indexing.py:583: in get_duck_array
return self.array.get_duck_array()
.venv/lib/python3.10/site-packages/xarray/core/indexing.py:794: in get_duck_array
return self.array.get_duck_array()
.venv/lib/python3.10/site-packages/xarray/core/indexing.py:657: in get_duck_array
array = self.array[self.key]
.venv/lib/python3.10/site-packages/xarray/backends/zarr.py:166: in __getitem__
return indexing.explicit_indexing_adapter(
.venv/lib/python3.10/site-packages/xarray/core/indexing.py:1018: in explicit_indexing_adapter
result = raw_indexing_method(raw_key.tuple)
.venv/lib/python3.10/site-packages/xarray/backends/zarr.py:156: in _getitem
return self._array[key]
.venv/lib/python3.10/site-packages/zarr/core.py:800: in __getitem__
result = self.get_basic_selection(pure_selection, fields=fields)
.venv/lib/python3.10/site-packages/zarr/core.py:926: in get_basic_selection
return self._get_basic_selection_nd(selection=selection, out=out, fields=fields)
.venv/lib/python3.10/site-packages/zarr/core.py:968: in _get_basic_selection_nd
return self._get_selection(indexer=indexer, out=out, fields=fields)
.venv/lib/python3.10/site-packages/zarr/core.py:1343: in _get_selection
self._chunk_getitems(
.venv/lib/python3.10/site-packages/zarr/core.py:2181: in _chunk_getitems
self._process_chunk(
.venv/lib/python3.10/site-packages/zarr/core.py:2049: in _process_chunk
self._compressor.decode(cdata, dest)
numcodecs/blosc.pyx:564: in numcodecs.blosc.Blosc.decode
???
numcodecs/blosc.pyx:365: in numcodecs.blosc.decompress
???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> ???
E ValueError: buffer source array is read-only
What did you expect to happen?
I expected a single value to be returned in both cases.
Note: If the value passed to sel
is a list containing a single time value, then the error doesn't occur.
Minimal Complete Verifiable Example
from pathlib import Path
import tempfile
import numpy as np
import pandas as pd
import xarray as xr
times1 = pd.date_range("2019-01-01", periods=1, freq="YS")
times2 = pd.date_range("2019-01-01", periods=2, freq="YS")
ds1 = xr.Dataset({"abc": ("time", [1])}, coords={"time": ("time", times1)})
ds2 = xr.Dataset({"abc": ("time", [1, 2])}, coords={"time": ("time", times2)})
# save to zarr and reload
temp1 = Path(tempfile.TemporaryDirectory().name) / "temp1.zarr"
temp2 = Path(tempfile.TemporaryDirectory().name) / "temp2.zarr"
ds1.to_zarr(temp1)
ds2.to_zarr(temp2)
ds1_reloaded = xr.open_zarr(temp1)
ds2_reloaded = xr.open_zarr(temp2)
# everything is fine with ds2_reloaded
assert ds2_reloaded.sel(time="2019-01-01").abc.values == np.array(1)
assert ds2_reloaded.sel(time=["2019-01-01"]).abc.values == np.array([1])
# this works for ds1_reloaded:
assert ds1_reloaded.sel(time=["2019-01-01"]).abc.values == np.array([1])
# this raises a value error:
ds1_reloaded.sel(time="2019-01-01").abc.values
# this raises the same error:
ds1_reloaded.sel(time="2019-01-01").as_numpy()
# but this works
ds1_reloaded.as_numpy()
MVCE confirmation
- Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- Complete example — the example is self-contained, including all data and the text of any traceback.
- Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- New issue — a search of GitHub Issues suggests this is not a duplicate.
- Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Relevant log output
# traceback for ValueError from .sel followed by .as_numpy:
ValueError Traceback (most recent call last)
Cell In[58], line 1
----> 1 ds1_reloaded.sel(time="2019-01-01").as_numpy()
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/dataset.py:1434, in Dataset.as_numpy(self)
1425 def as_numpy(self) -> Self:
1426 """
1427 Coerces wrapped data and coordinates into numpy arrays, returning a Dataset.
1428
(...)
1432 DataArray.to_numpy : Returns only the data as a numpy.ndarray object.
1433 """
-> 1434 numpy_variables = {k: v.as_numpy() for k, v in self.variables.items()}
1435 return self._replace(variables=numpy_variables)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/dataset.py:1434, in <dictcomp>(.0)
1425 def as_numpy(self) -> Self:
1426 """
1427 Coerces wrapped data and coordinates into numpy arrays, returning a Dataset.
1428
(...)
1432 DataArray.to_numpy : Returns only the data as a numpy.ndarray object.
1433 """
-> 1434 numpy_variables = {k: v.as_numpy() for k, v in self.variables.items()}
1435 return self._replace(variables=numpy_variables)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/namedarray/core.py:861, in NamedArray.as_numpy(self)
859 def as_numpy(self) -> Self:
860 """Coerces wrapped data into a numpy array, returning a Variable."""
--> 861 return self._replace(data=self.to_numpy())
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/namedarray/core.py:857, in NamedArray.to_numpy(self)
855 """Coerces wrapped data to numpy and returns a numpy.ndarray"""
856 # TODO an entrypoint so array libraries can choose coercion method?
--> 857 return to_numpy(self._data)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/namedarray/pycompat.py:111, in to_numpy(data, **kwargs)
109 if is_chunked_array(data):
110 chunkmanager = get_chunked_array_type(data)
--> 111 data, *_ = chunkmanager.compute(data, **kwargs)
112 if isinstance(data, array_type("cupy")):
113 data = data.get()
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/namedarray/daskmanager.py:86, in DaskManager.compute(self, *data, **kwargs)
81 def compute(
82 self, *data: Any, **kwargs: Any
83 ) -> tuple[np.ndarray[Any, _DType_co], ...]:
84 from dask.array import compute
---> 86 return compute(*data, **kwargs)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/dask/base.py:661, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
658 postcomputes.append(x.__dask_postcompute__())
660 with shorten_traceback():
--> 661 results = schedule(dsk, keys, **kwargs)
663 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/indexing.py:580, in ImplicitToExplicitIndexingAdapter.__array__(self, dtype, copy)
578 return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy)
579 else:
--> 580 return np.asarray(self.get_duck_array(), dtype=dtype)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/indexing.py:583, in ImplicitToExplicitIndexingAdapter.get_duck_array(self)
582 def get_duck_array(self):
--> 583 return self.array.get_duck_array()
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/indexing.py:794, in CopyOnWriteArray.get_duck_array(self)
793 def get_duck_array(self):
--> 794 return self.array.get_duck_array()
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/indexing.py:657, in LazilyIndexedArray.get_duck_array(self)
653 array = apply_indexer(self.array, self.key)
654 else:
655 # If the array is not an ExplicitlyIndexedNDArrayMixin,
656 # it may wrap a BackendArray so use its __getitem__
--> 657 array = self.array[self.key]
659 # self.array[self.key] is now a numpy array when
660 # self.array is a BackendArray subclass
661 # and self.key is BasicIndexer((slice(None, None, None),))
662 # so we need the explicit check for ExplicitlyIndexed
663 if isinstance(array, ExplicitlyIndexed):
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/backends/zarr.py:166, in ZarrArrayWrapper.__getitem__(self, key)
164 elif isinstance(key, indexing.OuterIndexer):
165 method = self._oindex
--> 166 return indexing.explicit_indexing_adapter(
167 key, array.shape, indexing.IndexingSupport.VECTORIZED, method
168 )
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/core/indexing.py:1018, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
996 """Support explicit indexing by delegating to a raw indexing method.
997
998 Outer and/or vectorized indexers are supported by indexing a second time
(...)
1015 Indexing result, in the form of a duck numpy-array.
1016 """
1017 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
-> 1018 result = raw_indexing_method(raw_key.tuple)
1019 if numpy_indices.tuple:
1020 # index the loaded np.ndarray
1021 indexable = NumpyIndexingAdapter(result)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/xarray/backends/zarr.py:156, in ZarrArrayWrapper._getitem(self, key)
155 def _getitem(self, key):
--> 156 return self._array[key]
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:800, in Array.__getitem__(self, selection)
798 result = self.get_orthogonal_selection(pure_selection, fields=fields)
799 else:
--> 800 result = self.get_basic_selection(pure_selection, fields=fields)
801 return result
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:926, in Array.get_basic_selection(self, selection, out, fields)
924 return self._get_basic_selection_zd(selection=selection, out=out, fields=fields)
925 else:
--> 926 return self._get_basic_selection_nd(selection=selection, out=out, fields=fields)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:968, in Array._get_basic_selection_nd(self, selection, out, fields)
962 def _get_basic_selection_nd(self, selection, out=None, fields=None):
963 # implementation of basic selection for array with at least one dimension
964
965 # setup indexer
966 indexer = BasicIndexer(selection, self)
--> 968 return self._get_selection(indexer=indexer, out=out, fields=fields)
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:1343, in Array._get_selection(self, indexer, out, fields)
1340 if math.prod(out_shape) > 0:
1341 # allow storage to get multiple items at once
1342 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
-> 1343 self._chunk_getitems(
1344 lchunk_coords,
1345 lchunk_selection,
1346 out,
1347 lout_selection,
1348 drop_axes=indexer.drop_axes,
1349 fields=fields,
1350 )
1351 if out.shape:
1352 return out
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:2181, in Array._chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes, fields)
2179 for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection):
2180 if ckey in cdatas:
-> 2181 self._process_chunk(
2182 out,
2183 cdatas[ckey],
2184 chunk_select,
2185 drop_axes,
2186 out_is_ndarray,
2187 fields,
2188 out_select,
2189 partial_read_decode=partial_read_decode,
2190 )
2191 else:
2192 # check exception type
2193 if self._fill_value is not None:
File ~/Documents/openghg/.venv/lib/python3.10/site-packages/zarr/core.py:2049, in Array._process_chunk(self, out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection, partial_read_decode)
2047 if isinstance(cdata, PartialReadBuffer):
2048 cdata = cdata.read_full()
-> 2049 self._compressor.decode(cdata, dest)
2050 else:
2051 if isinstance(cdata, UncompressedPartialReadBufferV3):
File numcodecs/blosc.pyx:564, in numcodecs.blosc.Blosc.decode()
File numcodecs/blosc.pyx:365, in numcodecs.blosc.decompress()
File numcodecs/compat_ext.pyx:16, in numcodecs.compat_ext.Buffer.__cinit__()
ValueError: buffer source array is read-only
Anything else we need to know?
No response
Environment
xarray: 2024.10.0
pandas: 2.2.2
numpy: 1.26.4
scipy: 1.13.0
netCDF4: 1.6.5
pydap: None
h5netcdf: 1.3.0
h5py: 3.11.0
zarr: 2.18.0
cftime: 1.6.3
nc_time_axis: 1.4.1
iris: None
bottleneck: None
dask: 2024.5.0
distributed: None
matplotlib: 3.8.4
cartopy: None
seaborn: None
numbagg: None
fsspec: 2024.3.1
cupy: None
pint: None
sparse: None
flox: None
numpy_groupies: None
setuptools: 69.5.1
pip: 24.0
conda: None
pytest: 8.2.0
mypy: 1.7.1
IPython: 8.24.0
sphinx: 7.4.7