kyleam
diff --git a/‎pymc/__init__.py
Lines changed: 1 addition & 0 deletions b/‎pymc/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pymc/stats.py
Lines changed: 162 additions & 1 deletion b/‎pymc/stats.py
Lines changed: 162 additions & 1 deletion
diff --git a/‎pymc/tests/test_stats.py
Lines changed: 155 additions & 9 deletions b/‎pymc/tests/test_stats.py
Lines changed: 155 additions & 9 deletions
@@ -6,6 +6,7 @@
 
 from .trace import *
 from .sample import *
+from .stats import summary
 from .step_methods import *
 from .tuning import *
 
 
@@ -1,8 +1,11 @@
 """Utility functions for PyMC"""
 
 import numpy as np
+from .trace import MultiTrace
+import warnings
 
-__all__ = ['autocorr', 'autocov', 'hpd', 'quantiles', 'mc_error']
+
+__all__ = ['autocorr', 'autocov', 'hpd', 'quantiles', 'mc_error', 'summary']
 
 def statfunc(f):
     """
@@ -237,3 +240,161 @@ def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5)):
 
     except IndexError:
         print("Too few elements for quantile calculation")
+
+
+def summary(trace, vars=None, alpha=0.05, start=0, batches=100, roundto=3):
+    """
+    Generate a pretty-printed summary of the node.
+
+    :Parameters:
+    trace : Trace object
+      Trace containing MCMC sample
+
+    vars : list of strings
+      List of variables to summarize. Defaults to None, which results
+      in all variables summarized.
+
+    alpha : float
+      The alpha level for generating posterior intervals. Defaults to
+      0.05.
+
+    start : int
+      The starting index from which to summarize (each) chain. Defaults
+      to zero.
+
+    batches : int
+      Batch size for calculating standard deviation for non-independent
+      samples. Defaults to 100.
+
+    roundto : int
+      The number of digits to round posterior statistics.
+
+    """
+    if vars is None:
+        vars = trace.varnames
+    if isinstance(trace, MultiTrace):
+        trace = trace.combined()
+
+    stat_summ = _StatSummary(roundto, batches, alpha)
+    pq_summ = _PosteriorQuantileSummary(roundto, alpha)
+
+    for var in vars:
+        # Extract sampled values
+        sample = trace[var][start:]
+        if sample.ndim == 1:
+            sample = sample[:, None]
+        elif sample.ndim > 2:
+            ## trace dimensions greater than 2 (variable greater than 1)
+            warnings.warn('Skipping {} (above 1 dimension)'.format(var))
+            continue
+
+        print('\n%s:' % var)
+        print(' ')
+
+        stat_summ.print_output(sample)
+        pq_summ.print_output(sample)
+
+
+class _Summary(object):
+    """Base class for summary output"""
+    def __init__(self, roundto):
+        self.roundto = roundto
+        self.header_lines = None
+        self.leader = '  '
+        self.spaces = None
+
+    def print_output(self, sample):
+        print('\n'.join(list(self._get_lines(sample))) + '\n')
+
+    def _get_lines(self, sample):
+        for line in self.header_lines:
+            yield self.leader + line
+        summary_lines = self._calculate_values(sample)
+        for line in self._create_value_output(summary_lines):
+            yield self.leader + line
+
+    def _create_value_output(self, lines):
+        for values in lines:
+            self._format_values(values)
+            yield self.value_line.format(pad=self.spaces, **values).strip()
+
+    def _calculate_values(self, sample):
+        raise NotImplementedError
+
+    def _format_values(self, summary_values):
+        for key, val in summary_values.items():
+            summary_values[key] = '{:.{ndec}f}'.format(
+                float(val), ndec=self.roundto)
+
+
+class _StatSummary(_Summary):
+    def __init__(self, roundto, batches, alpha):
+        super(_StatSummary, self).__init__(roundto)
+        spaces = 17
+        hpd_name = '{}% HPD interval'.format(int(100 * (1 - alpha)))
+        value_line = '{mean:<{pad}}{sd:<{pad}}{mce:<{pad}}{hpd:<{pad}}'
+        header = value_line.format(mean='Mean', sd='SD', mce='MC Error',
+                                  hpd=hpd_name, pad=spaces).strip()
+        hline = '-' * len(header)
+
+        self.header_lines = [header, hline]
+        self.spaces = spaces
+        self.value_line = value_line
+        self.batches = batches
+        self.alpha = alpha
+
+    def _calculate_values(self, sample):
+        return _calculate_stats(sample, self.batches, self.alpha)
+
+    def _format_values(self, summary_values):
+        roundto = self.roundto
+        for key, val in summary_values.items():
+            if key == 'hpd':
+                summary_values[key] = '[{:.{ndec}f}, {:.{ndec}f}]'.format(
+                    *val, ndec=roundto)
+            else:
+                summary_values[key] = '{:.{ndec}f}'.format(
+                    float(val), ndec=roundto)
+
+
+class _PosteriorQuantileSummary(_Summary):
+    def __init__(self, roundto, alpha):
+        super(_PosteriorQuantileSummary, self).__init__(roundto)
+        spaces = 15
+        title = 'Posterior quantiles:'
+        value_line = '{lo:<{pad}}{q25:<{pad}}{q50:<{pad}}{q75:<{pad}}{hi:<{pad}}'
+        lo, hi = 100 * alpha / 2, 100 * (1. - alpha / 2)
+        qlist = (lo, 25, 50, 75, hi)
+        header = value_line.format(lo=lo, q25=25, q50=50, q75=75, hi=hi,
+                                   pad=spaces).strip()
+        hline = '|{thin}|{thick}|{thick}|{thin}|'.format(
+            thin='-' * (spaces - 1), thick='=' * (spaces - 1))
+
+        self.header_lines = [title, header, hline]
+        self.spaces = spaces
+        self.lo, self.hi = lo, hi
+        self.qlist = qlist
+        self.value_line = value_line
+
+    def _calculate_values(self, sample):
+        return _calculate_posterior_quantiles(sample, self.qlist)
+
+
+def _calculate_stats(sample, batches, alpha):
+    means = sample.mean(0)
+    sds = sample.std(0)
+    mces = mc_error(sample, batches)
+    intervals = hpd(sample, alpha)
+    for index in range(sample.shape[1]):
+        mean, sd, mce = [stat[index] for stat in (means, sds, mces)]
+        interval = intervals[index].squeeze().tolist()
+        yield {'mean': mean, 'sd': sd, 'mce': mce, 'hpd': interval}
+
+
+def _calculate_posterior_quantiles(sample, qlist):
+    var_quantiles = quantiles(sample, qlist=qlist)
+    ## Replace ends of qlist with 'lo' and 'hi'
+    qends = {qlist[0]: 'lo', qlist[-1]: 'hi'}
+    qkeys = {q: qends[q] if q in qends else 'q{}'.format(q) for q in qlist}
+    for index in range(sample.shape[1]):
+        yield {qkeys[q]: var_quantiles[q][index] for q in qlist}
@@ -1,44 +1,190 @@
-from ..stats import *
+import pymc as pm
+from pymc import stats
+import numpy as np
 from numpy.random import random, normal, seed
 from numpy.testing import assert_equal, assert_almost_equal, assert_array_almost_equal
+import warnings
+import nose
 
 seed(111)
 normal_sample = normal(0, 1, 1000000)
 
 def test_autocorr():
     """Test autocorrelation and autocovariance functions"""
 
-    assert_almost_equal(autocorr(normal_sample), 0, 2)
+    assert_almost_equal(stats.autocorr(normal_sample), 0, 2)
 
     y = [(normal_sample[i-1] + normal_sample[i])/2 for i in range(1, len(normal_sample))]
-    assert_almost_equal(autocorr(y), 0.5, 2)
+    assert_almost_equal(stats.autocorr(y), 0.5, 2)
 
 def test_hpd():
     """Test HPD calculation"""
 
-    interval = hpd(normal_sample)
+    interval = stats.hpd(normal_sample)
 
     assert_array_almost_equal(interval, [-1.96, 1.96], 2)
 
 def test_make_indices():
     """Test make_indices function"""
 
-    from ..stats import make_indices
-
     ind = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
 
-    assert_equal(ind, make_indices((2, 3)))
+    assert_equal(ind, stats.make_indices((2, 3)))
 
 def test_mc_error():
     """Test batch standard deviation function"""
 
     x = random(100000)
 
-    assert(mc_error(x) < 0.0025)
+    assert(stats.mc_error(x) < 0.0025)
 
 def test_quantiles():
     """Test quantiles function"""
 
-    q = quantiles(normal_sample)
+    q = stats.quantiles(normal_sample)
 
     assert_array_almost_equal(sorted(q.values()), [-1.96, -0.67, 0, 0.67, 1.96], 2)
+
+
+def test_summary_1_value_model():
+    mu = -2.1
+    tau = 1.3
+    with pm.Model() as model:
+        x = pm.Normal('x', mu, tau, testval=.1)
+        step = pm.Metropolis(model.vars, np.diag([1.]))
+        trace = pm.sample(100, step=step)
+    stats.summary(trace)
+
+
+def test_summary_2_value_model():
+    mu = -2.1
+    tau = 1.3
+    with pm.Model() as model:
+        x = pm.Normal('x', mu, tau, shape=2, testval=[.1, .1])
+        step = pm.Metropolis(model.vars, np.diag([1.]))
+        trace = pm.sample(100, step=step)
+    stats.summary(trace)
+
+
+def test_summary_2dim_value_model():
+    mu = -2.1
+    tau = 1.3
+    with pm.Model() as model:
+        x = pm.Normal('x', mu, tau, shape=(2, 2),
+                   testval=np.tile(.1, (2, 2)))
+        step = pm.Metropolis(model.vars, np.diag([1.]))
+        trace = pm.sample(100, step=step)
+
+    with warnings.catch_warnings(record=True) as wrn:
+        stats.summary(trace)
+        assert len(wrn) == 1
+        assert str(wrn[0].message) == 'Skipping x (above 1 dimension)'
+
+
+def test_summary_format_values():
+    roundto = 2
+    summ = stats._Summary(roundto)
+    d = {'nodec': 1, 'onedec': 1.0, 'twodec': 1.00, 'threedec': 1.000}
+    summ._format_values(d)
+    for val in d.values():
+        assert val == '1.00'
+
+
+def test_stat_summary_format_hpd_values():
+    roundto = 2
+    summ = stats._StatSummary(roundto, None, 0.05)
+    d = {'nodec': 1, 'hpd': [1, 1]}
+    summ._format_values(d)
+    for key, val in d.items():
+        if key == 'hpd':
+            assert val == '[1.00, 1.00]'
+        else:
+            assert val == '1.00'
+
+
+@nose.tools.raises(IndexError)
+def test_calculate_stats_variable_size1_not_adjusted():
+    sample = np.arange(10)
+    list(stats._calculate_stats(sample, 5, 0.05))
+
+
+def test_calculate_stats_variable_size1_adjusted():
+    sample = np.arange(10)[:, None]
+    result_size = len(list(stats._calculate_stats(sample, 5, 0.05)))
+    assert result_size == 1
+
+def test_calculate_stats_variable_size2():
+    ## 2 traces of 5
+    sample = np.arange(10).reshape(5, 2)
+    result_size = len(list(stats._calculate_stats(sample, 5, 0.05)))
+    assert result_size == 2
+
+
+@nose.tools.raises(IndexError)
+def test_calculate_pquantiles_variable_size1_not_adjusted():
+    sample = np.arange(10)
+    qlist = (0.25, 25, 50, 75, 0.98)
+    list(stats._calculate_posterior_quantiles(sample,
+                                              qlist))
+
+
+def test_calculate_pquantiles_variable_size1_adjusted():
+    sample = np.arange(10)[:, None]
+    qlist = (0.25, 25, 50, 75, 0.98)
+    result_size = len(list(stats._calculate_posterior_quantiles(sample,
+                                                                qlist)))
+    assert result_size == 1
+
+
+def test_stats_value_line():
+    roundto = 1
+    summ = stats._StatSummary(roundto, None, 0.05)
+    values = [{'mean': 0, 'sd': 1, 'mce': 2, 'hpd': [4, 4]},
+              {'mean': 5, 'sd': 6, 'mce': 7, 'hpd': [8, 8]},]
+
+    expected = ['0.0              1.0              2.0              [4.0, 4.0]',
+                '5.0              6.0              7.0              [8.0, 8.0]']
+    result = list(summ._create_value_output(values))
+    assert result == expected
+
+
+def test_post_quantile_value_line():
+    roundto = 1
+    summ = stats._PosteriorQuantileSummary(roundto, 0.05)
+    values = [{'lo': 0, 'q25': 1, 'q50': 2, 'q75': 4, 'hi': 5},
+              {'lo': 6, 'q25': 7, 'q50': 8, 'q75': 9, 'hi': 10},]
+
+    expected = ['0.0            1.0            2.0            4.0            5.0',
+                '6.0            7.0            8.0            9.0            10.0']
+    result = list(summ._create_value_output(values))
+    assert result == expected
+
+
+def test_stats_output_lines():
+    roundto = 1
+    x = np.arange(10).reshape(5, 2)
+
+    summ = stats._StatSummary(roundto, 5, 0.05)
+
+    expected = ['  Mean             SD               MC Error         95% HPD interval',
+                '  -------------------------------------------------------------------',
+                '  4.0              2.8              1.3              [0.0, 8.0]',
+                '  5.0              2.8              1.3              [1.0, 9.0]',]
+    result = list(summ._get_lines(x))
+    assert result == expected
+
+
+def test_posterior_quantiles_output_lines():
+    roundto = 1
+    x = np.arange(10).reshape(5, 2)
+
+    summ = stats._PosteriorQuantileSummary(roundto, 0.05)
+
+    expected = ['  Posterior quantiles:',
+                '  2.5            25             50             75             97.5',
+                '  |--------------|==============|==============|--------------|',
+                '  0.0            2.0            4.0            6.0            8.0',
+                '  1.0            3.0            5.0            7.0            9.0']
+
+    result = list(summ._get_lines(x))
+    assert result == expected