pymc-devs · ferrine · Jan 15, 2024 · Jan 15, 2024 · Jan 15, 2024 · Jan 15, 2024
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
@@ -22,7 +22,6 @@
 import pytensor.tensor as pt
 import scipy.sparse as sps
 
-from pytensor import scalar
 from pytensor.compile import Function, Mode, get_mode
 from pytensor.compile.builders import OpFromGraph
 from pytensor.gradient import grad
@@ -39,7 +38,7 @@
 from pytensor.graph.op import Op
 from pytensor.scalar.basic import Cast
 from pytensor.scan.op import Scan
-from pytensor.tensor.basic import _as_tensor_variable
+from pytensor.tensor.basic import _as_tensor_variable, tensor_copy
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.random.op import RandomVariable
 from pytensor.tensor.random.type import RandomType
@@ -412,29 +411,7 @@ def hessian_diag(f, vars=None, negate_output=True):
         return empty_gradient
 
 
-class IdentityOp(scalar.UnaryScalarOp):
-    @staticmethod
-    def st_impl(x):
-        return x
-
-    def impl(self, x):
-        return x
-
-    def grad(self, inp, grads):
-        return grads
-
-    def c_code(self, node, name, inp, out, sub):
-        return f"{out[0]} = {inp[0]};"
-
-    def __eq__(self, other):
-        return isinstance(self, type(other))
-
-    def __hash__(self):
-        return hash(type(self))
-
-
-scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity")
-identity = Elemwise(scalar_identity, name="identity")
+identity = tensor_copy
 
 
 def make_shared_replacements(point, vars, model):

diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
@@ -93,8 +93,8 @@ def create_shared_params(self, start=None, start_sigma=None):
         rho = rho1
 
         return {
-            "mu": pytensor.shared(pm.floatX(start), "mu"),
-            "rho": pytensor.shared(pm.floatX(rho), "rho"),
+            "mu": pytensor.shared(pm.floatX(start), "mu", shape=start.shape),
+            "rho": pytensor.shared(pm.floatX(rho), "rho", shape=rho.shape),
         }
 
     @node_property
@@ -137,7 +137,10 @@ def create_shared_params(self, start=None):
         start = self._prepare_start(start)
         n = self.ddim
         L_tril = np.eye(n)[np.tril_indices(n)].astype(pytensor.config.floatX)
-        return {"mu": pytensor.shared(start, "mu"), "L_tril": pytensor.shared(L_tril, "L_tril")}
+        return {
+            "mu": pytensor.shared(start, "mu", shape=start.shape),
+            "L_tril": pytensor.shared(L_tril, "L_tril", shape=L_tril.shape),
+        }
 
     @node_property
     def L(self):
@@ -225,7 +228,13 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
                 for j in range(len(trace)):
                     histogram[i] = DictToArrayBijection.map(trace.point(j, t)).data
                     i += 1
-        return dict(histogram=pytensor.shared(pm.floatX(histogram), "histogram"))
+        return dict(
+            histogram=pytensor.shared(
+                pm.floatX(histogram),
+                "histogram",
+                shape=histogram.shape,
+            )
+        )
 
     def _check_trace(self):
         trace = self._kwargs.get("trace", None)

diff --git a/tests/sampling/test_jax.py b/tests/sampling/test_jax.py
@@ -86,7 +86,8 @@ def test_jax_PosDefMatrix():
     [
         pytest.param(1),
         pytest.param(
-            2, marks=pytest.mark.skipif(len(jax.devices()) < 2, reason="not enough devices")
+            2,
+            marks=pytest.mark.skipif(len(jax.devices()) < 2, reason="not enough devices"),
         ),
     ],
 )
@@ -265,7 +266,11 @@ def test_get_jaxified_logp():
 @pytest.fixture(scope="module")
 def model_test_idata_kwargs() -> pm.Model:
     with pm.Model(
-        coords={"x_coord": ["a", "b"], "x_coord2": [1, 2], "z_coord": ["apple", "banana", "orange"]}
+        coords={
+            "x_coord": ["a", "b"],
+            "x_coord2": [1, 2],
+            "z_coord": ["apple", "banana", "orange"],
+        }
     ) as m:
         x = pm.Normal("x", shape=(2,), dims=["x_coord"])
         _ = pm.Normal("y", x, observed=[0, 0])
@@ -372,7 +377,8 @@ def test_get_batched_jittered_initial_points():
     [
         pytest.param(1),
         pytest.param(
-            2, marks=pytest.mark.skipif(len(jax.devices()) < 2, reason="not enough devices")
+            2,
+            marks=pytest.mark.skipif(len(jax.devices()) < 2, reason="not enough devices"),
         ),
     ],
 )
@@ -536,3 +542,31 @@ def test_dirichlet_multinomial_dims():
     frozen_dm = freeze_dims_and_data(m)["dm"]
     dm_draws = pm.draw(frozen_dm, mode="JAX")
     np.testing.assert_equal(dm_draws, np.eye(3) * 5)
+
+
+@pytest.mark.parametrize("method", ["advi", "fullrank_advi"])
+def test_vi_sampling_jax(method):
+    with pm.Model() as model:
+        x = pm.Normal("x")
+        pm.fit(10, method=method, fn_kwargs=dict(mode="JAX"))
+
+
+@pytest.mark.xfail(
+    reason="""
+During equilibrium rewriter this error happens. Probably one of the routines in SVGD is problematic.
+
+TypeError: The broadcast pattern of the output of scan
+(Matrix(float64, shape=(?, 1))) is inconsistent with the one provided in `output_info`
+(Vector(float64, shape=(?,))). The output on axis 0 is `True`, but it is `False` on axis
+1 in `output_info`. This can happen if one of the dimension is fixed to 1 in the input,
+while it is still variable in the output, or vice-verca. You have to make them consistent,
+e.g. using pytensor.tensor.{unbroadcast, specify_broadcastable}.
+
+Instead of fixing this error it makes sense to rework the internals of the variational to utilize
+pytensor vectorize instead of scan.
+"""
+)
+def test_vi_sampling_jax_svgd():
+    with pm.Model():
+        x = pm.Normal("x")
+        pm.fit(10, method="svgd", fn_kwargs=dict(mode="JAX"))