Skip to content

Commit c412736

Browse files
dfmeigenfoo
andcommitted
[WIP] Adding support for adaptation of a dense mass matrix based on sample covariances (#3596)
* switching travis badges to svg * Adding adaptation for dense mass matrix * Moving shared code to QuadPotentialFull base * adding references to Welford's algorithm * adding a few more tests for full_adapt * add test for sampling using QuadPotentialFullAdapt * Update pymc3/step_methods/hmc/quadpotential.py Co-Authored-By: George Ho <[email protected]>
1 parent a460468 commit c412736

File tree

2 files changed

+309
-18
lines changed

2 files changed

+309
-18
lines changed

pymc3/step_methods/hmc/quadpotential.py

Lines changed: 168 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
import numpy as np
23
from numpy.random import normal
34
import scipy.linalg
@@ -8,7 +9,8 @@
89

910

1011
__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
11-
'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential']
12+
'QuadPotentialFullInv', 'QuadPotentialDiagAdapt',
13+
'QuadPotentialFullAdapt', 'isquadpotential']
1214

1315

1416
def quad_potential(C, is_cov):
@@ -416,7 +418,7 @@ def velocity_energy(self, x, v_out):
416418
class QuadPotentialFull(QuadPotential):
417419
"""Basic QuadPotential object for Hamiltonian calculations."""
418420

419-
def __init__(self, A, dtype=None):
421+
def __init__(self, cov, dtype=None):
420422
"""Compute the lower cholesky decomposition of the potential.
421423
422424
Parameters
@@ -427,32 +429,189 @@ def __init__(self, A, dtype=None):
427429
if dtype is None:
428430
dtype = theano.config.floatX
429431
self.dtype = dtype
430-
self.A = A.astype(self.dtype)
431-
self.L = scipy.linalg.cholesky(A, lower=True)
432+
self._cov = np.array(cov, dtype=self.dtype, copy=True)
433+
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
434+
self._n = len(self._cov)
432435

433436
def velocity(self, x, out=None):
434437
"""Compute the current velocity at a position in parameter space."""
435-
return np.dot(self.A, x, out=out)
438+
return np.dot(self._cov, x, out=out)
436439

437440
def random(self):
438441
"""Draw random value from QuadPotential."""
439-
n = floatX(normal(size=self.L.shape[0]))
440-
return scipy.linalg.solve_triangular(self.L.T, n)
442+
vals = np.random.normal(size=self._n).astype(self.dtype)
443+
return scipy.linalg.solve_triangular(self._chol.T, vals,
444+
overwrite_b=True)
441445

442446
def energy(self, x, velocity=None):
443447
"""Compute kinetic energy at a position in parameter space."""
444448
if velocity is None:
445449
velocity = self.velocity(x)
446-
return .5 * x.dot(velocity)
450+
return 0.5 * np.dot(x, velocity)
447451

448452
def velocity_energy(self, x, v_out):
449453
"""Compute velocity and return kinetic energy at a position in parameter space."""
450454
self.velocity(x, out=v_out)
451-
return 0.5 * np.dot(x, v_out)
455+
return self.energy(x, v_out)
452456

453457
__call__ = random
454458

455459

460+
class QuadPotentialFullAdapt(QuadPotentialFull):
461+
"""Adapt a dense mass matrix using the sample covariances
462+
463+
If the parameter ``doubling`` is true, the adaptation window is doubled
464+
every time it is passed. This can lead to better convergence of the mass
465+
matrix estimation.
466+
467+
"""
468+
def __init__(
469+
self,
470+
n,
471+
initial_mean,
472+
initial_cov=None,
473+
initial_weight=0,
474+
adaptation_window=101,
475+
update_window=1,
476+
doubling=True,
477+
dtype=None,
478+
):
479+
warnings.warn("QuadPotentialFullAdapt is an experimental feature")
480+
481+
if initial_cov is not None and initial_cov.ndim != 2:
482+
raise ValueError("Initial covariance must be two-dimensional.")
483+
if initial_mean.ndim != 1:
484+
raise ValueError("Initial mean must be one-dimensional.")
485+
if initial_cov is not None and initial_cov.shape != (n, n):
486+
raise ValueError(
487+
"Wrong shape for initial_cov: expected %s got %s"
488+
% (n, initial_cov.shape)
489+
)
490+
if len(initial_mean) != n:
491+
raise ValueError(
492+
"Wrong shape for initial_mean: expected %s got %s"
493+
% (n, len(initial_mean))
494+
)
495+
496+
if dtype is None:
497+
dtype = theano.config.floatX
498+
499+
if initial_cov is None:
500+
initial_cov = np.eye(n, dtype=dtype)
501+
initial_weight = 1
502+
503+
self.dtype = dtype
504+
self._n = n
505+
self._cov = np.array(initial_cov, dtype=self.dtype, copy=True)
506+
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
507+
self._chol_error = None
508+
self._foreground_cov = _WeightedCovariance(
509+
self._n, initial_mean, initial_cov, initial_weight, self.dtype
510+
)
511+
self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype)
512+
self._n_samples = 0
513+
514+
self._doubling = doubling
515+
self._adaptation_window = int(adaptation_window)
516+
self._update_window = int(update_window)
517+
self._previous_update = 0
518+
519+
def _update_from_weightvar(self, weightvar):
520+
weightvar.current_covariance(out=self._cov)
521+
try:
522+
self._chol = scipy.linalg.cholesky(self._cov, lower=True)
523+
except (scipy.linalg.LinAlgError, ValueError) as error:
524+
self._chol_error = error
525+
526+
def update(self, sample, grad, tune):
527+
if not tune:
528+
return
529+
530+
# Steps since previous update
531+
delta = self._n_samples - self._previous_update
532+
533+
self._foreground_cov.add_sample(sample, weight=1)
534+
self._background_cov.add_sample(sample, weight=1)
535+
536+
# Update the covariance matrix and recompute the Cholesky factorization
537+
# every "update_window" steps
538+
if (delta + 1) % self._update_window == 0:
539+
self._update_from_weightvar(self._foreground_cov)
540+
541+
# Reset the background covariance if we are at the end of the adaptation window.
542+
if delta >= self._adaptation_window:
543+
self._foreground_cov = self._background_cov
544+
self._background_cov = _WeightedCovariance(
545+
self._n, dtype=self.dtype
546+
)
547+
548+
self._previous_update = self._n_samples
549+
if self._doubling:
550+
self._adaptation_window *= 2
551+
552+
self._n_samples += 1
553+
554+
def raise_ok(self, vmap):
555+
if self._chol_error is not None:
556+
raise ValueError("{0}".format(self._chol_error))
557+
558+
559+
class _WeightedCovariance:
560+
"""Online algorithm for computing mean and covariance
561+
562+
This implements the `Welford's algorithm
563+
<https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ based
564+
on the implementation in `the Stan math library
565+
<https://github.com/stan-dev/math>`_.
566+
567+
"""
568+
569+
def __init__(
570+
self,
571+
nelem,
572+
initial_mean=None,
573+
initial_covariance=None,
574+
initial_weight=0,
575+
dtype="d",
576+
):
577+
self._dtype = dtype
578+
self.n_samples = float(initial_weight)
579+
if initial_mean is None:
580+
self.mean = np.zeros(nelem, dtype="d")
581+
else:
582+
self.mean = np.array(initial_mean, dtype="d", copy=True)
583+
if initial_covariance is None:
584+
self.raw_cov = np.eye(nelem, dtype="d")
585+
else:
586+
self.raw_cov = np.array(initial_covariance, dtype="d", copy=True)
587+
588+
self.raw_cov[:] *= self.n_samples
589+
590+
if self.raw_cov.shape != (nelem, nelem):
591+
raise ValueError("Invalid shape for initial covariance.")
592+
if self.mean.shape != (nelem,):
593+
raise ValueError("Invalid shape for initial mean.")
594+
595+
def add_sample(self, x, weight):
596+
x = np.asarray(x)
597+
self.n_samples += 1
598+
old_diff = x - self.mean
599+
self.mean[:] += old_diff / self.n_samples
600+
new_diff = x - self.mean
601+
self.raw_cov[:] += weight * new_diff[:, None] * old_diff[None, :]
602+
603+
def current_covariance(self, out=None):
604+
if self.n_samples == 0:
605+
raise ValueError("Can not compute covariance without samples.")
606+
if out is not None:
607+
return np.divide(self.raw_cov, self.n_samples - 1, out=out)
608+
else:
609+
return (self.raw_cov / (self.n_samples - 1)).astype(self._dtype)
610+
611+
def current_mean(self):
612+
return np.array(self.mean, dtype=self._dtype)
613+
614+
456615
try:
457616
import sksparse.cholmod as cholmod
458617
chol_available = True

0 commit comments

Comments
 (0)