Typify Sparse input variables in JAX linker

ricardoV94 · ricardoV94 · commit cb4ab4063f1e · 2023-07-07T12:50:19.000+02:00
diff --git a/pytensor/link/jax/dispatch/basic.py b/pytensor/link/jax/dispatch/basic.py
@@ -87,11 +87,11 @@ def assert_fn(x, *inputs):
 def jnp_safe_copy(x):
     try:
         res = jnp.copy(x)
-    except NotImplementedError:
-        warnings.warn(
-            "`jnp.copy` is not implemented yet. Using the object's `copy` method."
-        )
+    except (NotImplementedError, TypeError):
         if hasattr(x, "copy"):
+            warnings.warn(
+                "`jnp.copy` is not implemented yet. Using the object's `copy` method."
+            )
             res = jnp.array(x.copy())
         else:
             warnings.warn(f"Object has no `copy` method: {x}")
diff --git a/pytensor/link/jax/dispatch/sparse.py b/pytensor/link/jax/dispatch/sparse.py
@@ -1,38 +1,66 @@
 import jax.experimental.sparse as jsp
 from scipy.sparse import spmatrix
 
-from pytensor.graph.basic import Constant
+from pytensor.graph.type import HasDataType
 from pytensor.link.jax.dispatch import jax_funcify, jax_typify
-from pytensor.sparse.basic import Dot, StructuredDot
+from pytensor.sparse.basic import Dot, StructuredDot, Transpose
 from pytensor.sparse.type import SparseTensorType
+from pytensor.tensor import TensorType
 
 
 @jax_typify.register(spmatrix)
 def jax_typify_spmatrix(matrix, dtype=None, **kwargs):
-    # Note: This changes the type of the constants from CSR/CSC to BCOO
-    # We could add BCOO as a PyTensor type but this would only be useful for JAX graphs
-    # and it would break the premise of one graph -> multiple backends.
-    # The same situation happens with RandomGenerators...
     return jsp.BCOO.from_scipy_sparse(matrix)
 
 
+class BCOOType(TensorType, HasDataType):
+    """JAX-compatible BCOO type.
+
+    This type is not exposed to users directly.
+
+    It is introduced by the JIT linker in place of any SparseTensorType input
+    variables used in the original function. Nodes in the function graph will
+    still show the original types as inputs and outputs.
+    """
+
+    def filter(self, data, strict: bool = False, allow_downcast=None):
+        if isinstance(data, jsp.BCOO):
+            return data
+
+        if strict:
+            raise TypeError()
+
+        return jax_typify(data)
+
+
+@jax_typify.register(SparseTensorType)
+def jax_typify_SparseTensorType(type):
+    return BCOOType(
+        dtype=type.dtype,
+        shape=type.shape,
+        name=type.name,
+        broadcastable=type.broadcastable,
+    )
+
+
 @jax_funcify.register(Dot)
 @jax_funcify.register(StructuredDot)
 def jax_funcify_sparse_dot(op, node, **kwargs):
-    for input in node.inputs:
-        if isinstance(input.type, SparseTensorType) and not isinstance(input, Constant):
-            raise NotImplementedError(
-                "JAX sparse dot only implemented for constant sparse inputs"
-            )
-
-    if isinstance(node.outputs[0].type, SparseTensorType):
-        raise NotImplementedError("JAX sparse dot only implemented for dense outputs")
-
     @jsp.sparsify
     def sparse_dot(x, y):
         out = x @ y
-        if isinstance(out, jsp.BCOO):
+        if isinstance(out, jsp.BCOO) and not isinstance(
+            node.outputs[0].type, SparseTensorType
+        ):
             out = out.todense()
         return out
 
     return sparse_dot
+
+
+@jax_funcify.register(Transpose)
+def jax_funciy_sparse_transpose(op, **kwargs):
+    def sparse_transpose(x):
+        return x.T
+
+    return sparse_transpose
diff --git a/pytensor/link/jax/linker.py b/pytensor/link/jax/linker.py
@@ -12,6 +12,7 @@ class JAXLinker(JITLinker):
 
     def fgraph_convert(self, fgraph, input_storage, storage_map, **kwargs):
         from pytensor.link.jax.dispatch import jax_funcify
+        from pytensor.sparse.type import SparseTensorType
         from pytensor.tensor.random.type import RandomType
 
         if any(
@@ -23,6 +24,16 @@ def fgraph_convert(self, fgraph, input_storage, storage_map, **kwargs):
                 "Input values should be provided in this format to avoid a conversion overhead."
             )
 
+        if any(
+            isinstance(inp.type, SparseTensorType)
+            and not isinstance(inp, SharedVariable)
+            for inp in fgraph.inputs
+        ):
+            warnings.warn(
+                "SparseTypes are implicitly converted to sparse BCOO arrays in JAX. "
+                "Input values should be provided in this format to to avoid a conversion overhead."
+            )
+
         shared_rng_inputs = [
             inp
             for inp in fgraph.inputs
diff --git a/tests/link/jax/test_sparse.py b/tests/link/jax/test_sparse.py
@@ -1,14 +1,49 @@
 import numpy as np
 import pytest
 import scipy.sparse
+from jax.experimental.sparse import BCOO
 
 import pytensor.sparse as ps
 import pytensor.tensor as pt
 from pytensor import function
-from pytensor.graph import FunctionGraph
+from pytensor.graph import Constant, FunctionGraph
+from pytensor.tensor.type import DenseTensorType
 from tests.link.jax.test_basic import compare_jax_and_py
 
 
+def assert_bcoo_arrays_close(a1, a2):
+    assert isinstance(a1, BCOO)
+    assert isinstance(a1, BCOO)
+    np.testing.assert_allclose(a1.todense(), a2.todense())
+
+
+@pytest.mark.parametrize("sparse_type", ("csc", "csr"))
+def test_sparse_io(sparse_type):
+    """Test explicit (non-shared) input and output sparse types in JAX."""
+    sparse_mat = ps.matrix(format=sparse_type, name="csc", dtype="float32")
+    sparse_mat_out = sparse_mat.T
+
+    with pytest.warns(
+        UserWarning,
+        match="SparseTypes are implicitly converted to sparse BCOO arrays",
+    ):
+        fn = function([sparse_mat], sparse_mat_out, mode="JAX")
+
+    sp_sparse_mat = scipy.sparse.random(
+        5, 40, density=0.25, format=sparse_type, dtype="float32"
+    )
+    jx_sparse_mat = BCOO.from_scipy_sparse(sp_sparse_mat)
+
+    sp_res = fn(sp_sparse_mat)
+    jx_res = fn(jx_sparse_mat)
+    assert_bcoo_arrays_close(sp_res, jx_sparse_mat.T)
+    assert_bcoo_arrays_close(jx_res, jx_sparse_mat.T)
+
+    # Chained applications
+    assert_bcoo_arrays_close(fn(fn(sp_sparse_mat)), jx_sparse_mat)
+    assert_bcoo_arrays_close(fn(fn(jx_sparse_mat)), jx_sparse_mat)
+
+
 @pytest.mark.parametrize(
     "op, x_type, y_type",
     [
@@ -19,57 +54,62 @@
         # structured_dot only allows matrix @ matrix
         (ps.structured_dot, pt.matrix, ps.matrix),
         (ps.structured_dot, ps.matrix, pt.matrix),
+        (ps.structured_dot, ps.matrix, ps.matrix),
     ],
 )
-def test_sparse_dot_constant_sparse(x_type, y_type, op):
+@pytest.mark.parametrize("x_constant", (False, True))
+@pytest.mark.parametrize("y_constant", (False, True))
+def test_sparse_dot(x_type, y_type, op, x_constant, y_constant):
     inputs = []
     test_values = []
 
     if x_type is ps.matrix:
-        x_sp = scipy.sparse.random(5, 40, density=0.25, format="csr", dtype="float32")
-        x_pt = ps.as_sparse_variable(x_sp, name="x")
+        x_test = scipy.sparse.random(5, 40, density=0.25, format="csr", dtype="float32")
+        x_pt = ps.as_sparse_variable(x_test, name="x")
     else:
-        x_pt = x_type("x", dtype="float32")
-        if x_pt.ndim == 1:
+        if x_type is pt.vector:
             x_test = np.arange(40, dtype="float32")
         else:
             x_test = np.arange(5 * 40, dtype="float32").reshape(5, 40)
+        x_pt = pt.as_tensor_variable(x_test, name="x")
+    assert isinstance(x_pt, Constant)
+
+    if not x_constant:
+        x_pt = x_pt.type(name="x")
         inputs.append(x_pt)
         test_values.append(x_test)
 
     if y_type is ps.matrix:
-        y_sp = scipy.sparse.random(40, 3, density=0.25, format="csc", dtype="float32")
-        y_pt = ps.as_sparse_variable(y_sp, name="y")
+        y_test = scipy.sparse.random(40, 3, density=0.25, format="csc", dtype="float32")
+        y_pt = ps.as_sparse_variable(y_test, name="y")
     else:
-        y_pt = y_type("y", dtype="float32")
-        if y_pt.ndim == 1:
+        if y_type is pt.vector:
             y_test = np.arange(40, dtype="float32")
         else:
             y_test = np.arange(40 * 3, dtype="float32").reshape(40, 3)
+        y_pt = pt.as_tensor_variable(y_test, name="y")
+    assert isinstance(y_pt, Constant)
+
+    if not y_constant:
+        y_pt = y_pt.type(name="y")
         inputs.append(y_pt)
         test_values.append(y_test)
 
     dot_pt = op(x_pt, y_pt)
     fgraph = FunctionGraph(inputs, [dot_pt])
-    compare_jax_and_py(fgraph, test_values)
-
-
-def test_sparse_dot_non_const_raises():
-    x_pt = pt.vector("x")
-
-    y_sp = scipy.sparse.random(40, 3, density=0.25, format="csc", dtype="float32")
-    y_pt = ps.as_sparse_variable(y_sp, name="y").type()
-
-    out = ps.dot(x_pt, y_pt)
-
-    msg = "JAX sparse dot only implemented for constant sparse inputs"
-
-    with pytest.raises(NotImplementedError, match=msg):
-        function([x_pt, y_pt], out, mode="JAX")
-
-    y_pt_shared = ps.shared(y_sp, name="y")
 
-    out = ps.dot(x_pt, y_pt_shared)
+    def assert_fn(x, y):
+        [x] = x
+        [y] = y
+        if hasattr(x, "todense"):
+            x = x.todense()
+        if hasattr(y, "todense"):
+            y = y.todense()
+        np.testing.assert_allclose(x, y)
 
-    with pytest.raises(NotImplementedError, match=msg):
-        function([x_pt], out, mode="JAX")
+    compare_jax_and_py(
+        fgraph,
+        test_values,
+        must_be_device_array=isinstance(dot_pt.type, DenseTensorType),
+        assert_fn=assert_fn,
+    )