Skip to content

Add slc option to Chain.get_draws and get_stats #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mcbackend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
pass


__version__ = "0.1.3"
__version__ = "0.2.0"
19 changes: 8 additions & 11 deletions mcbackend/backends/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,15 @@ def _get_row_at(
result = dict(zip(var_names, data[0][0]))
return result

def _get_rows( # pylint: disable=W0221
def _get_rows(
self,
var_name: str,
nshape: Optional[Sequence[int]],
dtype: str,
*,
burn: int = 0,
slc: slice = slice(None),
) -> numpy.ndarray:
self._commit()
data = self._client.execute(
f"SELECT (`{var_name}`) FROM {self.cid} WHERE _draw_idx>={burn};"
)
data = self._client.execute(f"SELECT (`{var_name}`) FROM {self.cid};")
draws = len(data)

# Safety checks
Expand All @@ -201,20 +198,20 @@ def _get_rows( # pylint: disable=W0221
arr[:] = buffer
return arr
# Otherwise (identical shapes) we can collapse into one ndarray
return numpy.asarray(buffer, dtype=dtype)
return numpy.asarray(buffer, dtype=dtype)[slc]

def get_draws(self, var_name: str) -> numpy.ndarray:
def get_draws(self, var_name: str, slc: slice = slice(None)) -> numpy.ndarray:
var = self.variables[var_name]
nshape = var.shape if not var.undefined_ndim else None
return self._get_rows(var_name, nshape, var.dtype)
return self._get_rows(var_name, nshape, var.dtype, slc)

def get_draws_at(self, idx: int, var_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
return self._get_row_at(idx, var_names)

def get_stats(self, stat_name: str) -> numpy.ndarray:
def get_stats(self, stat_name: str, slc: slice = slice(None)) -> numpy.ndarray:
var = self.sample_stats[stat_name]
nshape = var.shape if not var.undefined_ndim else None
return self._get_rows(f"__stat_{stat_name}", nshape, var.dtype)
return self._get_rows(f"__stat_{stat_name}", nshape, var.dtype, slc)

def get_stats_at(self, idx: int, stat_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
stats = self._get_row_at(idx, [f"__stat_{sname}" for sname in stat_names])
Expand Down
8 changes: 4 additions & 4 deletions mcbackend/backends/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ def append(
def __len__(self) -> int:
return self._draw_idx

def get_draws(self, var_name: str) -> numpy.ndarray:
return self._samples[var_name][: self._draw_idx]
def get_draws(self, var_name: str, slc: slice = slice(None)) -> numpy.ndarray:
return self._samples[var_name][: self._draw_idx][slc]

def get_draws_at(self, idx: int, var_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
return {vn: numpy.asarray(self._samples[vn][idx]) for vn in var_names}

def get_stats(self, stat_name: str) -> numpy.ndarray:
return self._stats[stat_name][: self._draw_idx]
def get_stats(self, stat_name: str, slc: slice = slice(None)) -> numpy.ndarray:
return self._stats[stat_name][: self._draw_idx][slc]

def get_stats_at(self, idx: int, stat_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
return {sn: numpy.asarray(self._stats[sn][idx]) for sn in stat_names}
Expand Down
26 changes: 22 additions & 4 deletions mcbackend/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,30 @@ def append(
"""
raise NotImplementedError()

def get_draws(self, var_name: str) -> numpy.ndarray:
"""Retrieve all draws of a variable from an MCMC chain."""
def get_draws(self, var_name: str, slc: slice = slice(None)) -> numpy.ndarray:
"""Retrieve draws of a variable from an MCMC chain.

Parameters
----------
var_name : str
Name of the variable.
slc : slice, optional
Optional ``slice`` object to retrieve only a subset of elements.
Passing this can be more performant than slicing the returned value.
"""
raise NotImplementedError()

def get_stats(self, stat_name: str) -> numpy.ndarray:
"""Retrieve all values of a sampler statistic."""
def get_stats(self, stat_name: str, slc: slice = slice(None)) -> numpy.ndarray:
"""Retrieve values of a sampler statistic.

Parameters
----------
stat_name : str
Name of the stats variable.
slc : slice, optional
Optional ``slice`` object to retrieve only a subset of elements.
Passing this can be more performant than slicing the returned value.
"""
raise NotImplementedError()

def get_draws_at(self, idx: int, var_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
Expand Down
41 changes: 41 additions & 0 deletions mcbackend/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,47 @@ def test__append_get_with_changelings(self, with_stats):
numpy.testing.assert_array_equal(act, exp)
pass

@pytest.mark.parametrize(
"slc",
[
None,
slice(None, None, None),
slice(2, None, None),
slice(2, 10, None),
slice(2, 15, 3),
slice(-8, None, None),
slice(-8, -2, 2),
slice(-50, -2, 2),
slice(15, 10),
],
)
def test__get_slicing(self, slc: slice):
rmeta = make_runmeta(
variables=[Variable("A", "uint8")],
sample_stats=[Variable("B", "uint8")],
data=[],
)
run = self.backend.init_run(rmeta)
chain = run.init_chain(0)

# Generate draws and add them to the chain
N = 20
draws = [dict(A=n) for n in range(N)]
stats = [dict(B=n) for n in range(N)]
for d, s in zip(draws, stats):
chain.append(d, s)
assert len(chain) == N

# slc=None in this test means "don't pass it".
# The implementations should default to slc=slice(None, None, None).
expected = numpy.arange(N, dtype="uint8")[slc or slice(None, None, None)]
kwargs = dict(slc=slc) if slc is not None else {}
act_draws = chain.get_draws("A", **kwargs)
act_stats = chain.get_stats("B", **kwargs)
numpy.testing.assert_array_equal(act_draws, expected)
numpy.testing.assert_array_equal(act_stats, expected)
pass

def test__get_chains(self):
rmeta = make_runmeta()
run = self.backend.init_run(rmeta)
Expand Down