7
7
import multiprocessing as mp
8
8
import warnings
9
9
10
- from .metropolis import MultivariateNormalProposal
10
+ from .. step_methods . metropolis import MultivariateNormalProposal
11
11
from .smc_utils import (
12
12
_initial_population ,
13
13
_calc_covariance ,
20
20
)
21
21
from ..theanof import inputvars , make_shared_replacements
22
22
from ..model import modelcontext
23
+ from ..parallel_sampling import _cpu_count
23
24
24
25
25
26
EXPERIMENTAL_WARNING = (
26
27
"Warning: SMC-ABC methods are experimental step methods and not yet"
27
28
" recommended for use in PyMC3!"
28
29
)
29
30
30
- __all__ = ["SMC" , " sample_smc" ]
31
+ __all__ = ["sample_smc" ]
31
32
32
33
33
- class SMC :
34
- r"""
35
- Sequential Monte Carlo step
34
+ def sample_smc (
35
+ draws = 1000 ,
36
+ kernel = "metropolis" ,
37
+ n_steps = 25 ,
38
+ parallel = False ,
39
+ start = None ,
40
+ cores = None ,
41
+ tune_scaling = True ,
42
+ tune_steps = True ,
43
+ scaling = 1.0 ,
44
+ p_acc_rate = 0.99 ,
45
+ threshold = 0.5 ,
46
+ epsilon = 1.0 ,
47
+ dist_func = "absolute_error" ,
48
+ sum_stat = False ,
49
+ progressbar = False ,
50
+ model = None ,
51
+ random_seed = - 1 ,
52
+ ):
53
+ """
54
+ Sequential Monte Carlo based sampling
36
55
37
56
Parameters
38
57
----------
58
+ draws : int
59
+ The number of samples to draw from the posterior (i.e. last stage). And also the number of
60
+ independent chains. Defaults to 1000.
61
+ kernel : str
62
+ Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`.
63
+ Use `ABC` for likelihood free inference togheter with a ``pm.Simulator``.
39
64
n_steps : int
40
- The number of steps of a Markov Chain. If `tune_steps == True` `n_steps` will be used for
41
- the first stage and the number of steps of the other stages will be determined
42
- automatically based on the acceptance rate and `p_acc_rate`.
43
- The number of steps will never be larger than `n_steps`.
44
- scaling : float
45
- Factor applied to the proposal distribution i.e. the step size of the Markov Chain. Only
46
- works if `tune_scaling == False` otherwise is determined automatically.
47
- p_acc_rate : float
48
- Used to compute `n_steps` when `tune_steps == True`. The higher the value of `p_acc_rate`
49
- the higher the number of steps computed automatically. Defaults to 0.99. It should be
50
- between 0 and 1.
65
+ The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
66
+ for the first stage and for the others it will be determined automatically based on the
67
+ acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
68
+ parallel : bool
69
+ Distribute computations across cores if the number of cores is larger than 1.
70
+ Defaults to False.
71
+ start : dict, or array of dict
72
+ Starting point in parameter space. It should be a list of dict with length `chains`.
73
+ When None (default) the starting point is sampled from the prior distribution.
74
+ cores : int
75
+ The number of chains to run in parallel. If ``None`` (default), it will be automatically
76
+ set to the number of CPUs in the system.
51
77
tune_scaling : bool
52
- Whether to compute the scaling automatically or not. Defaults to True
78
+ Whether to compute the scaling factor automatically or not. Defaults to True
53
79
tune_steps : bool
54
80
Whether to compute the number of steps automatically or not. Defaults to True
81
+ scaling : float
82
+ Scaling factor applied to the proposal distribution i.e. the step size of the Markov Chain.
83
+ If ``tune_scaling == True`` (defaults) it will be determined automatically at each stage.
84
+ p_acc_rate : float
85
+ Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of
86
+ ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.99.
87
+ It should be between 0 and 1.
55
88
threshold : float
56
89
Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
57
90
the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
58
91
It should be between 0 and 1.
59
- parallel : bool
60
- Distribute computations across cores if the number of cores is larger than 1
61
- (see `pm.sample()` for details). Defaults to True.
62
- model : :class:`pymc3.Model`
63
- Optional model for sampling step. Defaults to None (taken from context).
92
+ epsilon : float
93
+ Standard deviation of the gaussian pseudo likelihood. Only works with `kernel = ABC`
94
+ dist_func : str
95
+ Distance function. Available options are ``absolute_error`` (default) and
96
+ ``sum_of_squared_distance``. Only works with ``kernel = ABC``
97
+ sum_stat : bool
98
+ Whether to use or not a summary statistics. Defaults to False. Only works with
99
+ ``kernel = ABC``
100
+ progressbar : bool
101
+ Flag for displaying a progress bar. Defaults to False.
102
+ model : Model (optional if in ``with`` context)).
103
+ random_seed : int
104
+ random seed
64
105
65
106
Notes
66
107
-----
67
- SMC works by moving through successive stages. At each stage the inverse temperature \beta is
68
- increased a little bit (starting from 0 up to 1). When \beta = 0 we have the prior distribution
69
- and when \beta =1 we have the posterior distribution. So in more general terms we are always
70
- computing samples from a tempered posterior that we can write as:
108
+ SMC works by moving through successive stages. At each stage the inverse temperature
109
+ :math:`\b eta` is increased a little bit (starting from 0 up to 1). When :math:`\b eta` = 0
110
+ we have the prior distribution and when :math:`\b eta` =1 we have the posterior distribution.
111
+ So in more general terms we are always computing samples from a tempered posterior that we can
112
+ write as:
71
113
72
114
.. math::
73
115
@@ -76,10 +118,10 @@ class SMC:
76
118
A summary of the algorithm is:
77
119
78
120
1. Initialize :math:`\b eta` at zero and stage at zero.
79
- 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior
80
- is the prior).
81
- 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined value
82
- (we use :math:`Nt`, where :math:`t` is 0.5 by default).
121
+ 2. Generate N samples :math:`S_{\b eta}` from the prior (because when :math `\b eta = 0` the
122
+ tempered posterior is the prior).
123
+ 3. Increase :math:`\b eta` in order to make the effective sample size equals some predefined
124
+ value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
83
125
4. Compute a set of N importance weights W. The weights are computed as the ratio of the
84
126
likelihoods of a sample at stage i+1 and stage i.
85
127
5. Obtain :math:`S_{w}` by re-sampling according to W.
@@ -106,69 +148,20 @@ class SMC:
106
148
%282007%29133:7%28816%29>`__
107
149
"""
108
150
109
- def __init__ (
110
- self ,
111
- n_steps = 25 ,
112
- scaling = 1.0 ,
113
- p_acc_rate = 0.99 ,
114
- tune_scaling = True ,
115
- tune_steps = True ,
116
- threshold = 0.5 ,
117
- parallel = True ,
118
- ABC = False ,
119
- epsilon = 1 ,
120
- dist_func = "absolute_error" ,
121
- sum_stat = False ,
122
- ):
123
-
124
- self .n_steps = n_steps
125
- self .max_steps = n_steps
126
- self .scaling = scaling
127
- self .p_acc_rate = 1 - p_acc_rate
128
- self .tune_scaling = tune_scaling
129
- self .tune_steps = tune_steps
130
- self .threshold = threshold
131
- self .parallel = parallel
132
- self .ABC = ABC
133
- self .epsilon = epsilon
134
- self .dist_func = dist_func
135
- self .sum_stat = sum_stat
136
-
137
-
138
- def sample_smc (
139
- draws = 5000 , step = None , start = None , cores = None , progressbar = False , model = None , random_seed = - 1
140
- ):
141
- """
142
- Sequential Monte Carlo sampling
143
-
144
- Parameters
145
- ----------
146
- draws : int
147
- The number of samples to draw from the posterior (i.e. last stage). And also the number of
148
- independent Markov Chains. Defaults to 5000.
149
- step : :class:`SMC`
150
- SMC initialization object
151
- start : dict, or array of dict
152
- Starting point in parameter space. It should be a list of dict with length `chains`.
153
- cores : int
154
- The number of chains to run in parallel.
155
- progressbar : bool
156
- Flag for displaying a progress bar
157
- model : pymc3 Model
158
- optional if in `with` context
159
- random_seed : int
160
- random seed
161
- """
162
151
model = modelcontext (model )
163
152
164
153
if random_seed != - 1 :
165
154
np .random .seed (random_seed )
166
155
156
+ if cores is None :
157
+ cores = _cpu_count ()
158
+
167
159
beta = 0
168
160
stage = 0
169
161
accepted = 0
170
162
acc_rate = 1.0
171
- proposed = draws * step .n_steps
163
+ max_steps = n_steps
164
+ proposed = draws * n_steps
172
165
model .marginal_likelihood = 1
173
166
variables = inputvars (model .vars )
174
167
discrete = np .concatenate ([[v .dtype in pm .discrete_types ] * (v .dsize or 1 ) for v in variables ])
@@ -181,33 +174,29 @@ def sample_smc(
181
174
182
175
posterior , var_info = _initial_population (draws , model , variables , start )
183
176
184
- if step . ABC :
177
+ if kernel . lower () == "abc" :
185
178
warnings .warn (EXPERIMENTAL_WARNING )
186
179
simulator = model .observed_RVs [0 ]
187
180
likelihood_logp = PseudoLikelihood (
188
- step . epsilon ,
181
+ epsilon ,
189
182
simulator .observations ,
190
183
simulator .distribution .function ,
191
184
model ,
192
185
var_info ,
193
- step . dist_func ,
194
- step . sum_stat ,
186
+ dist_func ,
187
+ sum_stat ,
195
188
)
196
- else :
189
+ elif kernel . lower () == "metropolis" :
197
190
likelihood_logp = logp_forw ([model .datalogpt ], variables , shared )
198
191
199
192
while beta < 1 :
200
-
201
- if step .parallel and cores > 1 :
193
+ if parallel and cores > 1 :
202
194
pool = mp .Pool (processes = cores )
203
- results = pool .starmap (
204
- likelihood_logp ,
205
- [(sample ,) for sample in posterior ],
206
- )
195
+ results = pool .starmap (likelihood_logp , [(sample ,) for sample in posterior ])
207
196
else :
208
197
results = [likelihood_logp (sample ) for sample in posterior ]
209
198
likelihoods = np .array (results ).squeeze ()
210
- beta , old_beta , weights , sj = calc_beta (beta , likelihoods , step . threshold )
199
+ beta , old_beta , weights , sj = calc_beta (beta , likelihoods , threshold )
211
200
model .marginal_likelihood *= sj
212
201
# resample based on plausibility weights (selection)
213
202
resampling_indexes = np .random .choice (np .arange (draws ), size = draws , p = weights )
@@ -220,31 +209,39 @@ def sample_smc(
220
209
221
210
# compute scaling (optional) and number of Markov chains steps (optional), based on the
222
211
# acceptance rate of the previous stage
223
- if (step .tune_scaling or step .tune_steps ) and stage > 0 :
224
- _tune (acc_rate , proposed , step )
212
+ if (tune_scaling or tune_steps ) and stage > 0 :
213
+ scaling , n_steps = _tune (
214
+ acc_rate ,
215
+ proposed ,
216
+ tune_scaling ,
217
+ tune_steps ,
218
+ scaling ,
219
+ n_steps ,
220
+ max_steps ,
221
+ p_acc_rate ,
222
+ )
225
223
226
- pm ._log .info ("Stage: {:d} Beta: {:.3f} Steps: {:d}" .format (stage , beta , step . n_steps ))
224
+ pm ._log .info ("Stage: {:d} Beta: {:.3f} Steps: {:d}" .format (stage , beta , n_steps ))
227
225
# Apply Metropolis kernel (mutation)
228
- proposed = draws * step . n_steps
226
+ proposed = draws * n_steps
229
227
priors = np .array ([prior_logp (sample ) for sample in posterior ]).squeeze ()
230
228
tempered_logp = priors + likelihoods * beta
231
- deltas = np .squeeze (proposal (step . n_steps ) * step . scaling )
229
+ deltas = np .squeeze (proposal (n_steps ) * scaling )
232
230
233
231
parameters = (
234
232
proposal ,
235
- step . scaling ,
233
+ scaling ,
236
234
accepted ,
237
235
any_discrete ,
238
236
all_discrete ,
239
237
discrete ,
240
- step . n_steps ,
238
+ n_steps ,
241
239
prior_logp ,
242
240
likelihood_logp ,
243
241
beta ,
244
- step .ABC ,
245
242
)
246
243
247
- if step . parallel and cores > 1 :
244
+ if parallel and cores > 1 :
248
245
pool = mp .Pool (processes = cores )
249
246
results = pool .starmap (
250
247
metrop_kernel ,
0 commit comments