pymc-devs · michaelosthege · Oct 28, 2022 · Oct 8, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/pymc/__init__.py b/pymc/__init__.py
@@ -72,6 +72,7 @@ def __set_compiler_flags():
 from pymc.stats import *
 from pymc.step_methods import *
 from pymc.tuning import *
+from pymc.util import drop_warning_stat
 from pymc.variational import *
 from pymc.vartypes import *
 

diff --git a/pymc/backends/base.py b/pymc/backends/base.py
@@ -78,10 +78,6 @@ def __init__(self, name, model=None, vars=None, test_point=None):
         self.chain = None
         self._is_base_setup = False
         self.sampler_vars = None
-        self._warnings = []
-
-    def _add_warnings(self, warnings):
-        self._warnings.extend(warnings)
 
     # Sampling methods
 
@@ -288,9 +284,6 @@ def __init__(self, straces):
             self._straces[strace.chain] = strace
 
         self._report = SamplerReport()
-        for strace in straces:
-            if hasattr(strace, "_warnings"):
-                self._report._add_warnings(strace._warnings, strace.chain)
 
     def __repr__(self):
         template = "<{}: {} chains, {} iterations, {} variables>"

diff --git a/pymc/parallel_sampling.py b/pymc/parallel_sampling.py
@@ -40,12 +40,9 @@
 
 
 class ParallelSamplingError(Exception):
-    def __init__(self, message, chain, warnings=None):
+    def __init__(self, message, chain):
         super().__init__(message)
-        if warnings is None:
-            warnings = []
         self._chain = chain
-        self._warnings = warnings
 
 
 # Taken from https://hg.python.org/cpython/rev/c4f92b597074
@@ -74,8 +71,8 @@ def rebuild_exc(exc, tb):
 
 
 # Messages
-# ('writing_done', is_last, sample_idx, tuning, stats, warns)
-# ('error', warnings, *exception_info)
+# ('writing_done', is_last, sample_idx, tuning, stats)
+# ('error', *exception_info)
 
 # ('abort', reason)
 # ('write_next',)
@@ -133,7 +130,7 @@ def run(self):
             e = ExceptionWithTraceback(e, e.__traceback__)
             # Send is not blocking so we have to force a wait for the abort
             # message
-            self._msg_pipe.send(("error", None, e))
+            self._msg_pipe.send(("error", e))
             self._wait_for_abortion()
         finally:
             self._msg_pipe.close()
@@ -181,9 +178,8 @@ def _start_loop(self):
                 try:
                     point, stats = self._compute_point()
                 except SamplingError as e:
-                    warns = self._collect_warnings()
                     e = ExceptionWithTraceback(e, e.__traceback__)
-                    self._msg_pipe.send(("error", warns, e))
+                    self._msg_pipe.send(("error", e))
             else:
                 return
 
@@ -193,11 +189,7 @@ def _start_loop(self):
             elif msg[0] == "write_next":
                 self._write_point(point)
                 is_last = draw + 1 == self._draws + self._tune
-                if is_last:
-                    warns = self._collect_warnings()
-                else:
-                    warns = None
-                self._msg_pipe.send(("writing_done", is_last, draw, tuning, stats, warns))
+                self._msg_pipe.send(("writing_done", is_last, draw, tuning, stats))
                 draw += 1
             else:
                 raise ValueError("Unknown message " + msg[0])
@@ -210,12 +202,6 @@ def _compute_point(self):
             stats = None
         return point, stats
 
-    def _collect_warnings(self):
-        if hasattr(self._step_method, "warnings"):
-            return self._step_method.warnings()
-        else:
-            return []
-
 
 def _run_process(*args):
     _Process(*args).run()
@@ -308,11 +294,13 @@ def _send(self, msg, *args):
             except Exception:
                 pass
             if message is not None and message[0] == "error":
-                warns, old_error = message[1:]
-                if warns is not None:
-                    error = ParallelSamplingError(str(old_error), self.chain, warns)
+                old_error = message[1]
+                if old_error is not None:
+                    error = ParallelSamplingError(
+                        f"Chain {self.chain} failed with: {old_error}", self.chain
+                    )
                 else:
-                    error = RuntimeError("Chain %s failed." % self.chain)
+                    error = RuntimeError(f"Chain {self.chain} failed.")
                 raise error from old_error
             raise
 
@@ -345,11 +333,13 @@ def recv_draw(processes, timeout=3600):
         msg = ready[0].recv()
 
         if msg[0] == "error":
-            warns, old_error = msg[1:]
-            if warns is not None:
-                error = ParallelSamplingError(str(old_error), proc.chain, warns)
+            old_error = msg[1]
+            if old_error is not None:
+                error = ParallelSamplingError(
+                    f"Chain {proc.chain} failed with: {old_error}", proc.chain
+                )
             else:
-                error = RuntimeError("Chain %s failed." % proc.chain)
+                error = RuntimeError(f"Chain {proc.chain} failed.")
             raise error from old_error
         elif msg[0] == "writing_done":
             proc._readable = True
@@ -383,7 +373,7 @@ def terminate_all(processes, patience=2):
                 process.join()
 
 
-Draw = namedtuple("Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point", "warnings"])
+Draw = namedtuple("Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point"])
 
 
 class ParallelSampler:
@@ -466,7 +456,7 @@ def __iter__(self):
 
         while self._active:
             draw = ProcessAdapter.recv_draw(self._active)
-            proc, is_last, draw, tuning, stats, warns = draw
+            proc, is_last, draw, tuning, stats = draw
             self._total_draws += 1
             if not tuning and stats and stats[0].get("diverging"):
                 self._divergences += 1
@@ -491,7 +481,7 @@ def __iter__(self):
             if not is_last:
                 proc.write_next()
 
-            yield Draw(proc.chain, is_last, draw, tuning, stats, point, warns)
+            yield Draw(proc.chain, is_last, draw, tuning, stats, point)
 
     def __enter__(self):
         self._in_context = True

diff --git a/pymc/sampling.py b/pymc/sampling.py
@@ -70,12 +70,13 @@
 )
 from pymc.model import Model, modelcontext
 from pymc.parallel_sampling import Draw, _cpu_count
-from pymc.stats.convergence import run_convergence_checks
+from pymc.stats.convergence import SamplerWarning, log_warning, run_convergence_checks
 from pymc.step_methods import NUTS, CompoundStep, DEMetropolis
 from pymc.step_methods.arraystep import BlockedStep, PopulationArrayStepShared
 from pymc.step_methods.hmc import quadpotential
 from pymc.util import (
     dataset_to_point_list,
+    drop_warning_stat,
     get_default_varnames,
     get_untransformed_name,
     is_transformed_name,
@@ -323,6 +324,7 @@ def sample(
     jitter_max_retries: int = 10,
     *,
     return_inferencedata: bool = True,
+    keep_warning_stat: bool = False,
     idata_kwargs: dict = None,
     mp_ctx=None,
     **kwargs,
@@ -393,6 +395,13 @@ def sample(
         `MultiTrace` (False). Defaults to `True`.
     idata_kwargs : dict, optional
         Keyword arguments for :func:`pymc.to_inference_data`
+    keep_warning_stat : bool
+        If ``True`` the "warning" stat emitted by, for example, HMC samplers will be kept
+        in the returned ``idata.sample_stat`` group.
+        This leads to the ``idata`` not supporting ``.to_netcdf()`` or ``.to_zarr()`` and
+        should only be set to ``True`` if you intend to use the "warning" objects right away.
+        Defaults to ``False`` such that ``pm.drop_warning_stat`` is applied automatically,
+        making the ``InferenceData`` compatible with saving.
     mp_ctx : multiprocessing.context.BaseContent
         A multiprocessing context for parallel sampling.
         See multiprocessing documentation for details.
@@ -699,6 +708,10 @@ def sample(
                 mtrace.report._add_warnings(convergence_warnings)
 
         if return_inferencedata:
+            # By default we drop the "warning" stat which contains `SamplerWarning`
+            # objects that can not be stored with `.to_netcdf()`.
+            if not keep_warning_stat:
+                return drop_warning_stat(idata)
             return idata
     return mtrace
 
@@ -1048,32 +1061,26 @@ def _iter_sample(
             if step.generates_stats:
                 point, stats = step.step(point)
                 strace.record(point, stats)
+                log_warning_stats(stats)
                 diverging = i > tune and stats and stats[0].get("diverging")
             else:
                 point = step.step(point)
                 strace.record(point)
             if callback is not None:
-                warns = getattr(step, "warnings", None)
                 callback(
                     trace=strace,
-                    draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
+                    draw=Draw(chain, i == draws, i, i < tune, stats, point),
                 )
 
             yield strace, diverging
     except KeyboardInterrupt:
         strace.close()
-        if hasattr(step, "warnings"):
-            warns = step.warnings()
-            strace._add_warnings(warns)
         raise
     except BaseException:
         strace.close()
         raise
     else:
         strace.close()
-        if hasattr(step, "warnings"):
-            warns = step.warnings()
-            strace._add_warnings(warns)
 
 
 class PopulationStepper:
@@ -1356,6 +1363,7 @@ def _iter_population(
                     if steppers[c].generates_stats:
                         points[c], stats = updates[c]
                         strace.record(points[c], stats)
+                        log_warning_stats(stats)
                     else:
                         points[c] = updates[c]
                         strace.record(points[c])
@@ -1513,21 +1521,16 @@ def _mp_sample(
             with sampler:
                 for draw in sampler:
                     strace = traces[draw.chain]
-                    if draw.stats is not None:
-                        strace.record(draw.point, draw.stats)
-                    else:
-                        strace.record(draw.point)
+                    strace.record(draw.point, draw.stats)
+                    log_warning_stats(draw.stats)
                     if draw.is_last:
                         strace.close()
-                        if draw.warnings is not None:
-                            strace._add_warnings(draw.warnings)
 
                     if callback is not None:
                         callback(trace=trace, draw=draw)
 
         except ps.ParallelSamplingError as error:
             strace = traces[error._chain]
-            strace._add_warnings(error._warnings)
             for strace in traces:
                 strace.close()
 
@@ -1546,6 +1549,22 @@ def _mp_sample(
             strace.close()
 
 
+def log_warning_stats(stats: Sequence[Dict[str, Any]]):
+    """Logs 'warning' stats if present."""
+    if stats is None:
+        return
+
+    for sts in stats:
+        warn = sts.get("warning", None)
+        if warn is None:
+            continue
+        if isinstance(warn, SamplerWarning):
+            log_warning(warn)
+        else:
+            _log.warning(warn)
+    return
+
+
 def _choose_chains(traces: Sequence[BaseTrace], tune: int) -> Tuple[List[BaseTrace], int]:
     """
     Filter and slice traces such that (n_traces * len(shortest_trace)) is maximized.

diff --git a/pymc/stats/convergence.py b/pymc/stats/convergence.py
@@ -68,7 +68,7 @@ def run_convergence_checks(idata: arviz.InferenceData, model) -> List[SamplerWar
         warn = SamplerWarning(WarningType.BAD_PARAMS, msg, "info")
         return [warn]
 
-    warnings = []
+    warnings: List[SamplerWarning] = []
     valid_name = [rv.name for rv in model.free_RVs + model.deterministics]
     varnames = []
     for rv in model.free_RVs:
@@ -104,11 +104,60 @@ def run_convergence_checks(idata: arviz.InferenceData, model) -> List[SamplerWar
         warn = SamplerWarning(WarningType.CONVERGENCE, msg, "error", extra=ess)
         warnings.append(warn)
 
+    warnings += warn_divergences(idata)
+    warnings += warn_treedepth(idata)
+
+    return warnings
+
+
+def warn_divergences(idata: arviz.InferenceData) -> List[SamplerWarning]:
+    """Checks sampler stats and creates a list of warnings about divergences."""
+    sampler_stats = idata.get("sample_stats", None)
+    if sampler_stats is None:
+        return []
+
+    diverging = sampler_stats.get("diverging", None)
+    if diverging is None:
+        return []
+
+    # Warn about divergences
+    n_div = int(diverging.sum())
+    if n_div == 0:
+        return []
+    warning = SamplerWarning(
+        WarningType.DIVERGENCES,
+        f"There were {n_div} divergences after tuning. Increase `target_accept` or reparameterize.",
+        "error",
+    )
+    return [warning]
+
+
+def warn_treedepth(idata: arviz.InferenceData) -> List[SamplerWarning]:
+    """Checks sampler stats and creates a list of warnings about tree depth."""
+    sampler_stats = idata.get("sample_stats", None)
+    if sampler_stats is None:
+        return []
+
+    treedepth = sampler_stats.get("tree_depth", None)
+    if treedepth is None:
+        return []
+
+    warnings = []
+    for c in treedepth.chain:
+        if sum(treedepth.sel(chain=c)) / treedepth.sizes["draw"] > 0.05:
+            warnings.append(
+                SamplerWarning(
+                    WarningType.TREEDEPTH,
+                    f"Chain {c} reached the maximum tree depth."
+                    " Increase `max_treedepth`, increase `target_accept` or reparameterize.",
+                    "warn",
+                )
+            )
     return warnings
 
 
 def log_warning(warn: SamplerWarning):
-    level = _LEVELS[warn.level]
+    level = _LEVELS.get(warn.level, logging.WARNING)
     logger.log(level, warn.message)
 
 

diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
@@ -59,13 +59,6 @@ def step(self, point):
                 point = method.step(point)
             return point
 
-    def warnings(self):
-        warns = []
-        for method in self.methods:
-            if hasattr(method, "warnings"):
-                warns.extend(method.warnings())
-        return warns
-
     def stop_tuning(self):
         for method in self.methods:
             method.stop_tuning()