Skip to content

Commit 3746666

Browse files
authored
Refactor smc out of step methods (#3579)
* move smc from step_methods to its own family * black * update notebooks * add release note and fix lint * add release note and fix lint * minor fix docstring * reorder arguments and minor fix docstring
1 parent 24730cc commit 3746666

File tree

13 files changed

+481
-740
lines changed

13 files changed

+481
-740
lines changed

RELEASE-NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
- Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508)
1515
- Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559))
1616
- SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
17+
- SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)
1718

1819
## PyMC3 3.7 (May 29 2019)
1920

docs/source/notebooks/ODE_parameter_estimation.ipynb

Lines changed: 117 additions & 179 deletions
Large diffs are not rendered by default.

docs/source/notebooks/SMC-ABC_Lotka-Volterra_example.ipynb

Lines changed: 87 additions & 52 deletions
Large diffs are not rendered by default.

docs/source/notebooks/SMC2_gaussians.ipynb

Lines changed: 8 additions & 9 deletions
Large diffs are not rendered by default.

pymc3/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .stats import *
1212
from .sampling import *
1313
from .step_methods import *
14+
from .smc import *
1415
from .theanof import *
1516
from .tuning import *
1617
from .variational import *

pymc3/parallel_sampling.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,3 +425,22 @@ def __exit__(self, *args):
425425
ProcessAdapter.terminate_all(self._samplers)
426426
if self._progress is not None:
427427
self._progress.close()
428+
429+
def _cpu_count():
430+
"""Try to guess the number of CPUs in the system.
431+
432+
We use the number provided by psutil if that is installed.
433+
If not, we use the number provided by multiprocessing, but assume
434+
that half of the cpus are only hardware threads and ignore those.
435+
"""
436+
try:
437+
import psutil
438+
cpus = psutil.cpu_count(False)
439+
except ImportError:
440+
try:
441+
cpus = multiprocessing.cpu_count() // 2
442+
except NotImplementedError:
443+
cpus = 1
444+
if cpus is None:
445+
cpus = 1
446+
return cpus

pymc3/sampling.py

Lines changed: 131 additions & 165 deletions
Large diffs are not rendered by default.

pymc3/smc/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .smc import sample_smc

pymc3/step_methods/smc.py renamed to pymc3/smc/smc.py

Lines changed: 102 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import multiprocessing as mp
88
import warnings
99

10-
from .metropolis import MultivariateNormalProposal
10+
from ..step_methods.metropolis import MultivariateNormalProposal
1111
from .smc_utils import (
1212
_initial_population,
1313
_calc_covariance,
@@ -20,54 +20,96 @@
2020
)
2121
from ..theanof import inputvars, make_shared_replacements
2222
from ..model import modelcontext
23+
from ..parallel_sampling import _cpu_count
2324

2425

2526
EXPERIMENTAL_WARNING = (
2627
"Warning: SMC-ABC methods are experimental step methods and not yet"
2728
" recommended for use in PyMC3!"
2829
)
2930

30-
__all__ = ["SMC", "sample_smc"]
31+
__all__ = ["sample_smc"]
3132

3233

33-
class SMC:
34-
r"""
35-
Sequential Monte Carlo step
34+
def sample_smc(
35+
draws=1000,
36+
kernel="metropolis",
37+
n_steps=25,
38+
parallel=False,
39+
start=None,
40+
cores=None,
41+
tune_scaling=True,
42+
tune_steps=True,
43+
scaling=1.0,
44+
p_acc_rate=0.99,
45+
threshold=0.5,
46+
epsilon=1.0,
47+
dist_func="absolute_error",
48+
sum_stat=False,
49+
progressbar=False,
50+
model=None,
51+
random_seed=-1,
52+
):
53+
"""
54+
Sequential Monte Carlo based sampling
3655
3756
Parameters
3857
----------
58+
draws : int
59+
The number of samples to draw from the posterior (i.e. last stage). And also the number of
60+
independent chains. Defaults to 1000.
61+
kernel : str
62+
Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`.
63+
Use `ABC` for likelihood free inference togheter with a ``pm.Simulator``.
3964
n_steps : int
40-
The number of steps of a Markov Chain. If `tune_steps == True` `n_steps` will be used for
41-
the first stage and the number of steps of the other stages will be determined
42-
automatically based on the acceptance rate and `p_acc_rate`.
43-
The number of steps will never be larger than `n_steps`.
44-
scaling : float
45-
Factor applied to the proposal distribution i.e. the step size of the Markov Chain. Only
46-
works if `tune_scaling == False` otherwise is determined automatically.
47-
p_acc_rate : float
48-
Used to compute `n_steps` when `tune_steps == True`. The higher the value of `p_acc_rate`
49-
the higher the number of steps computed automatically. Defaults to 0.99. It should be
50-
between 0 and 1.
65+
The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
66+
for the first stage and for the others it will be determined automatically based on the
67+
acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
68+
parallel : bool
69+
Distribute computations across cores if the number of cores is larger than 1.
70+
Defaults to False.
71+
start : dict, or array of dict
72+
Starting point in parameter space. It should be a list of dict with length `chains`.
73+
When None (default) the starting point is sampled from the prior distribution.
74+
cores : int
75+
The number of chains to run in parallel. If ``None`` (default), it will be automatically
76+
set to the number of CPUs in the system.
5177
tune_scaling : bool
52-
Whether to compute the scaling automatically or not. Defaults to True
78+
Whether to compute the scaling factor automatically or not. Defaults to True
5379
tune_steps : bool
5480
Whether to compute the number of steps automatically or not. Defaults to True
81+
scaling : float
82+
Scaling factor applied to the proposal distribution i.e. the step size of the Markov Chain.
83+
If ``tune_scaling == True`` (defaults) it will be determined automatically at each stage.
84+
p_acc_rate : float
85+
Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of
86+
``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.99.
87+
It should be between 0 and 1.
5588
threshold : float
5689
Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
5790
the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
5891
It should be between 0 and 1.
59-
parallel : bool
60-
Distribute computations across cores if the number of cores is larger than 1
61-
(see `pm.sample()` for details). Defaults to True.
62-
model : :class:`pymc3.Model`
63-
Optional model for sampling step. Defaults to None (taken from context).
92+
epsilon : float
93+
Standard deviation of the gaussian pseudo likelihood. Only works with `kernel = ABC`
94+
dist_func : str
95+
Distance function. Available options are ``absolute_error`` (default) and
96+
``sum_of_squared_distance``. Only works with ``kernel = ABC``
97+
sum_stat : bool
98+
Whether to use or not a summary statistics. Defaults to False. Only works with
99+
``kernel = ABC``
100+
progressbar : bool
101+
Flag for displaying a progress bar. Defaults to False.
102+
model : Model (optional if in ``with`` context)).
103+
random_seed : int
104+
random seed
64105
65106
Notes
66107
-----
67-
SMC works by moving through successive stages. At each stage the inverse temperature \beta is
68-
increased a little bit (starting from 0 up to 1). When \beta = 0 we have the prior distribution
69-
and when \beta =1 we have the posterior distribution. So in more general terms we are always
70-
computing samples from a tempered posterior that we can write as:
108+
SMC works by moving through successive stages. At each stage the inverse temperature
109+
:math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
110+
we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
111+
So in more general terms we are always computing samples from a tempered posterior that we can
112+
write as:
71113
72114
.. math::
73115
@@ -76,10 +118,10 @@ class SMC:
76118
A summary of the algorithm is:
77119
78120
1. Initialize :math:`\beta` at zero and stage at zero.
79-
2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior
80-
is the prior).
81-
3. Increase :math:`\beta` in order to make the effective sample size equals some predefined value
82-
(we use :math:`Nt`, where :math:`t` is 0.5 by default).
121+
2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
122+
tempered posterior is the prior).
123+
3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
124+
value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
83125
4. Compute a set of N importance weights W. The weights are computed as the ratio of the
84126
likelihoods of a sample at stage i+1 and stage i.
85127
5. Obtain :math:`S_{w}` by re-sampling according to W.
@@ -106,69 +148,20 @@ class SMC:
106148
%282007%29133:7%28816%29>`__
107149
"""
108150

109-
def __init__(
110-
self,
111-
n_steps=25,
112-
scaling=1.0,
113-
p_acc_rate=0.99,
114-
tune_scaling=True,
115-
tune_steps=True,
116-
threshold=0.5,
117-
parallel=True,
118-
ABC=False,
119-
epsilon=1,
120-
dist_func="absolute_error",
121-
sum_stat=False,
122-
):
123-
124-
self.n_steps = n_steps
125-
self.max_steps = n_steps
126-
self.scaling = scaling
127-
self.p_acc_rate = 1 - p_acc_rate
128-
self.tune_scaling = tune_scaling
129-
self.tune_steps = tune_steps
130-
self.threshold = threshold
131-
self.parallel = parallel
132-
self.ABC = ABC
133-
self.epsilon = epsilon
134-
self.dist_func = dist_func
135-
self.sum_stat = sum_stat
136-
137-
138-
def sample_smc(
139-
draws=5000, step=None, start=None, cores=None, progressbar=False, model=None, random_seed=-1
140-
):
141-
"""
142-
Sequential Monte Carlo sampling
143-
144-
Parameters
145-
----------
146-
draws : int
147-
The number of samples to draw from the posterior (i.e. last stage). And also the number of
148-
independent Markov Chains. Defaults to 5000.
149-
step : :class:`SMC`
150-
SMC initialization object
151-
start : dict, or array of dict
152-
Starting point in parameter space. It should be a list of dict with length `chains`.
153-
cores : int
154-
The number of chains to run in parallel.
155-
progressbar : bool
156-
Flag for displaying a progress bar
157-
model : pymc3 Model
158-
optional if in `with` context
159-
random_seed : int
160-
random seed
161-
"""
162151
model = modelcontext(model)
163152

164153
if random_seed != -1:
165154
np.random.seed(random_seed)
166155

156+
if cores is None:
157+
cores = _cpu_count()
158+
167159
beta = 0
168160
stage = 0
169161
accepted = 0
170162
acc_rate = 1.0
171-
proposed = draws * step.n_steps
163+
max_steps = n_steps
164+
proposed = draws * n_steps
172165
model.marginal_likelihood = 1
173166
variables = inputvars(model.vars)
174167
discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables])
@@ -181,33 +174,29 @@ def sample_smc(
181174

182175
posterior, var_info = _initial_population(draws, model, variables, start)
183176

184-
if step.ABC:
177+
if kernel.lower() == "abc":
185178
warnings.warn(EXPERIMENTAL_WARNING)
186179
simulator = model.observed_RVs[0]
187180
likelihood_logp = PseudoLikelihood(
188-
step.epsilon,
181+
epsilon,
189182
simulator.observations,
190183
simulator.distribution.function,
191184
model,
192185
var_info,
193-
step.dist_func,
194-
step.sum_stat,
186+
dist_func,
187+
sum_stat,
195188
)
196-
else:
189+
elif kernel.lower() == "metropolis":
197190
likelihood_logp = logp_forw([model.datalogpt], variables, shared)
198191

199192
while beta < 1:
200-
201-
if step.parallel and cores > 1:
193+
if parallel and cores > 1:
202194
pool = mp.Pool(processes=cores)
203-
results = pool.starmap(
204-
likelihood_logp,
205-
[(sample,) for sample in posterior],
206-
)
195+
results = pool.starmap(likelihood_logp, [(sample,) for sample in posterior])
207196
else:
208197
results = [likelihood_logp(sample) for sample in posterior]
209198
likelihoods = np.array(results).squeeze()
210-
beta, old_beta, weights, sj = calc_beta(beta, likelihoods, step.threshold)
199+
beta, old_beta, weights, sj = calc_beta(beta, likelihoods, threshold)
211200
model.marginal_likelihood *= sj
212201
# resample based on plausibility weights (selection)
213202
resampling_indexes = np.random.choice(np.arange(draws), size=draws, p=weights)
@@ -220,31 +209,39 @@ def sample_smc(
220209

221210
# compute scaling (optional) and number of Markov chains steps (optional), based on the
222211
# acceptance rate of the previous stage
223-
if (step.tune_scaling or step.tune_steps) and stage > 0:
224-
_tune(acc_rate, proposed, step)
212+
if (tune_scaling or tune_steps) and stage > 0:
213+
scaling, n_steps = _tune(
214+
acc_rate,
215+
proposed,
216+
tune_scaling,
217+
tune_steps,
218+
scaling,
219+
n_steps,
220+
max_steps,
221+
p_acc_rate,
222+
)
225223

226-
pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, step.n_steps))
224+
pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, n_steps))
227225
# Apply Metropolis kernel (mutation)
228-
proposed = draws * step.n_steps
226+
proposed = draws * n_steps
229227
priors = np.array([prior_logp(sample) for sample in posterior]).squeeze()
230228
tempered_logp = priors + likelihoods * beta
231-
deltas = np.squeeze(proposal(step.n_steps) * step.scaling)
229+
deltas = np.squeeze(proposal(n_steps) * scaling)
232230

233231
parameters = (
234232
proposal,
235-
step.scaling,
233+
scaling,
236234
accepted,
237235
any_discrete,
238236
all_discrete,
239237
discrete,
240-
step.n_steps,
238+
n_steps,
241239
prior_logp,
242240
likelihood_logp,
243241
beta,
244-
step.ABC,
245242
)
246243

247-
if step.parallel and cores > 1:
244+
if parallel and cores > 1:
248245
pool = mp.Pool(processes=cores)
249246
results = pool.starmap(
250247
metrop_kernel,

0 commit comments

Comments
 (0)