Skip to content

Remove support for "dims on the fly" #6112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 9 additions & 43 deletions pymc/distributions/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from aesara import tensor as at
from aesara.compile.builders import OpFromGraph
from aesara.graph import node_rewriter
from aesara.graph.basic import Node, Variable, clone_replace
from aesara.graph.basic import Node, clone_replace
from aesara.graph.rewriting.basic import in2out
from aesara.graph.utils import MetaType
from aesara.tensor.basic import as_tensor_variable
Expand All @@ -42,9 +42,6 @@
from pymc.distributions.shape_utils import (
Dims,
Shape,
StrongDims,
StrongShape,
change_dist_size,
convert_dims,
convert_shape,
convert_size,
Expand Down Expand Up @@ -154,35 +151,6 @@ def fn(*args, **kwargs):
return fn


def _make_rv_and_resize_shape_from_dims(
*,
cls,
dims: Optional[StrongDims],
model,
observed,
args,
**kwargs,
) -> Tuple[Variable, StrongShape]:
"""Creates the RV, possibly using dims or observed to determine a resize shape (if needed)."""
resize_shape_from_dims = None
size_or_shape = kwargs.get("size") or kwargs.get("shape")

# Preference is given to size or shape. If not specified, we rely on dims and
# finally, observed, to determine the shape of the variable. Because dims can be
# specified on the fly, we need a two-step process where we first create the RV
# without dims information and then resize it.
if not size_or_shape and observed is not None:
kwargs["shape"] = tuple(observed.shape)

# Create the RV without dims information
rv_out = cls.dist(*args, **kwargs)

if not size_or_shape and dims is not None:
resize_shape_from_dims = shape_from_dims(dims, tuple(rv_out.shape), model)

return rv_out, resize_shape_from_dims


class SymbolicRandomVariable(OpFromGraph):
"""Symbolic Random Variable

Expand Down Expand Up @@ -311,17 +279,15 @@ def __new__(
if observed is not None:
observed = convert_observed_data(observed)

# Create the RV, without taking `dims` into consideration
rv_out, resize_shape_from_dims = _make_rv_and_resize_shape_from_dims(
cls=cls, dims=dims, model=model, observed=observed, args=args, **kwargs
)
# Preference is given to size or shape. If not specified, we rely on dims and
# finally, observed, to determine the shape of the variable.
if not ("size" in kwargs or "shape" in kwargs):
if dims is not None:
kwargs["shape"] = shape_from_dims(dims, model)
elif observed is not None:
kwargs["shape"] = tuple(observed.shape)

# Resize variable based on `dims` information
if resize_shape_from_dims:
resize_size_from_dims = find_size(
shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp
)
rv_out = change_dist_size(dist=rv_out, new_size=resize_size_from_dims, expand=False)
rv_out = cls.dist(*args, **kwargs)

rv_out = model.register_rv(
rv_out,
Expand Down
21 changes: 6 additions & 15 deletions pymc/distributions/shape_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,17 +480,13 @@ def convert_size(size: Size) -> Optional[StrongSize]:
return size


def shape_from_dims(
dims: StrongDims, shape_implied: Sequence[TensorVariable], model
) -> StrongShape:
def shape_from_dims(dims: StrongDims, model) -> StrongShape:
"""Determines shape from a `dims` tuple.

Parameters
----------
dims : array-like
A vector of dimension names or None.
shape_implied : tensor_like of int
Shape of RV implied from its inputs alone.
model : pm.Model
The current model on stack.

Expand All @@ -499,20 +495,15 @@ def shape_from_dims(
dims : tuple of (str or None)
Names or None for all RV dimensions.
"""
ndim_resize = len(dims) - len(shape_implied)

# Dims must be known already or be inferrable from implied dimensions of the RV
unknowndim_resize_dims = set(dims[:ndim_resize]) - set(model.dim_lengths)
if unknowndim_resize_dims:
# Dims must be known already
unknowndim_dims = set(dims) - set(model.dim_lengths)
if unknowndim_dims:
raise KeyError(
f"Dimensions {unknowndim_resize_dims} are unknown to the model and cannot be used to specify a `size`."
f"Dimensions {unknowndim_dims} are unknown to the model and cannot be used to specify a `shape`."
)

# The numeric/symbolic resize tuple can be created using model.RV_dim_lengths
return tuple(
model.dim_lengths[dname] if dname in model.dim_lengths else shape_implied[i]
for i, dname in enumerate(dims)
)
return tuple(model.dim_lengths[dname] for dname in dims)


def find_size(
Expand Down
39 changes: 8 additions & 31 deletions pymc/tests/distributions/test_shape_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,39 +312,16 @@ def test_simultaneous_dims_and_observed(self):
assert pmodel.RV_dims["y"] == ("ddata",)
assert y.eval().shape == (3,)

def test_define_dims_on_the_fly(self):
def test_define_dims_on_the_fly_raises(self):
# Check that trying to use dims that are not pre-specified fails, even if their
# length could be inferred from the shape of the variables
msg = "Dimensions {'patient'} are unknown to the model"
with pm.Model() as pmodel:
agedata = aesara.shared(np.array([10, 20, 30]))
with pytest.raises(KeyError, match=msg):
pm.Normal("x", [0, 1, 2], dims=("patient",))

# Associate the "patient" dim with an implied dimension
age = pm.Normal("age", agedata, dims=("patient",))
assert "patient" in pmodel.dim_lengths
assert pmodel.dim_lengths["patient"].eval() == 3

# Use the dim to replicate a new RV
effect = pm.Normal("effect", 0, dims=("patient",))
assert effect.ndim == 1
assert effect.eval().shape == (3,)

# Now change the length of the implied dimension
agedata.set_value([1, 2, 3, 4])
# The change should propagate all the way through
assert effect.eval().shape == (4,)

def test_define_dims_on_the_fly_from_observed(self):
with pm.Model() as pmodel:
data = aesara.shared(np.zeros((4, 5)))
x = pm.Normal("x", observed=data, dims=("patient", "trials"))
assert pmodel.dim_lengths["patient"].eval() == 4
assert pmodel.dim_lengths["trials"].eval() == 5

# Use dim to create a new RV
x_noisy = pm.Normal("x_noisy", 0, dims=("patient", "trials"))
assert x_noisy.eval().shape == (4, 5)

# Change data patient dims
data.set_value(np.zeros((10, 6)))
assert x_noisy.eval().shape == (10, 6)
with pytest.raises(KeyError, match=msg):
pm.Normal("x", observed=[0, 1, 2], dims=("patient",))

def test_can_resize_data_defined_size(self):
with pm.Model() as pmodel:
Expand Down
15 changes: 0 additions & 15 deletions pymc/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import pymc as pm

from pymc.aesaraf import GeneratorOp, floatX
from pymc.exceptions import ShapeError
from pymc.tests.helpers import SeededTest, select_by_precision


Expand Down Expand Up @@ -371,20 +370,6 @@ def test_symbolic_coords(self):
assert pmodel.dim_lengths["row"].eval() == 4
assert pmodel.dim_lengths["column"].eval() == 5

def test_no_resize_of_implied_dimensions(self):
with pm.Model() as pmodel:
# Imply a dimension through RV params
pm.Normal("n", mu=[1, 2, 3], dims="city")
# _Use_ the dimension for a data variable
inhabitants = pm.MutableData("inhabitants", [100, 200, 300], dims="city")

# Attempting to re-size the dimension through the data variable would
# cause shape problems in InferenceData conversion, because the RV remains (3,).
with pytest.raises(
ShapeError, match="was initialized from 'n' which is not a shared variable"
):
pmodel.set_data("inhabitants", [1, 2, 3, 4])

def test_implicit_coords_series(self):
pd = pytest.importorskip("pandas")
ser_sales = pd.Series(
Expand Down
18 changes: 15 additions & 3 deletions pymc/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ def test_shapeerror_from_resize_immutable_dim_from_RV():
Even if the variable being updated is a SharedVariable and has other
dimensions that are mutable.
"""
with pm.Model() as pmodel:
with pm.Model(coords={"fixed": range(3)}) as pmodel:
pm.Normal("a", mu=[1, 2, 3], dims="fixed")
assert isinstance(pmodel.dim_lengths["fixed"], TensorVariable)

Expand All @@ -751,7 +751,8 @@ def test_shapeerror_from_resize_immutable_dim_from_RV():
# This is fine because the "fixed" dim is not resized
pmodel.set_data("m", [[1, 2, 3], [3, 4, 5]])

with pytest.raises(ShapeError, match="was initialized from 'a'"):
msg = "The 'm' variable already had 3 coord values defined for its fixed dimension"
with pytest.raises(ValueError, match=msg):
# Can't work because the "fixed" dimension is linked to a
# TensorVariable with constant shape.
# Note that the new data tries to change both dimensions
Expand Down Expand Up @@ -826,7 +827,7 @@ def test_set_dim():


def test_set_dim_with_coords():
"""Test the concious re-sizing of dims created through add_coord() with coord value."""
"""Test the conscious re-sizing of dims created through add_coord() with coord value."""
with pm.Model() as pmodel:
pmodel.add_coord("mdim", mutable=True, length=2, values=["A", "B"])
a = pm.Normal("a", dims="mdim")
Expand Down Expand Up @@ -904,6 +905,17 @@ def test_set_data_indirect_resize_with_coords():
pmodel.set_data("mdata", [1, 2], coords=dict(mdim=[1, 2, 3]))


def test_set_data_constant_shape_error():
with pm.Model() as pmodel:
x = pm.Normal("x", size=7)
pmodel.add_coord("weekday", length=x.shape[0])
pm.MutableData("y", np.arange(7), dims="weekday")

msg = "because the dimension was initialized from 'x' which is not a shared variable"
with pytest.raises(ShapeError, match=msg):
pmodel.set_data("y", np.arange(10))


def test_model_logpt_deprecation_warning():
with pm.Model() as m:
x = pm.Normal("x", 0, 1, size=2)
Expand Down