Add benchmark tests for fused Elemwises

ricardoV94 · ricardoV94 · commit 842bc52ad462 · 2023-02-08T14:04:50.000+01:00
diff --git a/tests/link/numba/test_elemwise.py b/tests/link/numba/test_elemwise.py
@@ -11,6 +11,7 @@
 from pytensor import config, function
 from pytensor.compile.ops import deep_copy_op
 from pytensor.compile.sharedvalue import SharedVariable
+from pytensor.gradient import grad
 from pytensor.graph.basic import Constant
 from pytensor.graph.fg import FunctionGraph
 from pytensor.tensor import elemwise as at_elemwise
@@ -555,3 +556,18 @@ def test_logsumexp_benchmark(size, axis, benchmark):
     res = benchmark(X_lse_fn, X_val)
     exp_res = scipy.special.logsumexp(X_val, axis=axis, keepdims=True)
     np.testing.assert_array_almost_equal(res, exp_res)
+
+
+def test_fused_elemwise_benchmark(benchmark):
+    rng = np.random.default_rng(123)
+    size = 100_000
+    x = pytensor.shared(rng.normal(size=size), name="x")
+    mu = pytensor.shared(rng.normal(size=size), name="mu")
+
+    logp = -((x - mu) ** 2) / 2
+    grad_logp = grad(logp.sum(), x)
+
+    func = pytensor.function([], [logp, grad_logp], mode="NUMBA")
+    # JIT compile first
+    func()
+    benchmark(func)
diff --git a/tests/tensor/rewriting/test_elemwise.py b/tests/tensor/rewriting/test_elemwise.py
@@ -9,6 +9,7 @@
 from pytensor.compile.function import function
 from pytensor.compile.mode import Mode, get_default_mode
 from pytensor.configdefaults import config
+from pytensor.gradient import grad
 from pytensor.graph.basic import Constant
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.rewriting.basic import check_stack_trace, out2in
@@ -1349,6 +1350,18 @@ def test_multiple_outputs_fused_root_elemwise(self):
         assert len(nodes) == 1
         assert isinstance(nodes[0].op.scalar_op, Composite)
 
+    def test_eval_benchmark(self, benchmark):
+        rng = np.random.default_rng(123)
+        size = 100_000
+        x = pytensor.shared(rng.normal(size=size), name="x")
+        mu = pytensor.shared(rng.normal(size=size), name="mu")
+
+        logp = -((x - mu) ** 2) / 2
+        grad_logp = grad(logp.sum(), x)
+
+        func = pytensor.function([], [logp, grad_logp], mode="FAST_RUN")
+        benchmark(func)
+
 
 class TimesN(aes.basic.UnaryScalarOp):
     """