pytensor-54: Handle properly the scenarios where a Mul node has more than two factors with some of which may not be an exp

tamastokes · ricardoV94 · commit 28fdc862cbe1 · 2023-01-13T17:13:42.000+01:00
diff --git a/pytensor/tensor/rewriting/math.py b/pytensor/tensor/rewriting/math.py
@@ -423,33 +423,47 @@ def local_sumsqr2dot(fgraph, node):
                     return [new_out]
 
 
-@register_canonicalize
 @register_specialize
-@node_rewriter([Elemwise])
+@node_rewriter([mul, true_div])
 def local_mulexp2expadd(fgraph, node):
     """
     This rewrite detects e^x * e^y and converts it to e^(x+y).
     Similarly, e^x / e^y becomes e^(x-y).
     """
-    if (
-        isinstance(node.op, Elemwise)
-        and isinstance(node.op.scalar_op, (aes.Mul, aes.TrueDiv))
-        and node.inputs[0].owner
-        and isinstance(node.inputs[0].owner.op, Elemwise)
-        and isinstance(node.inputs[0].owner.op.scalar_op, aes.Exp)
-        and node.inputs[1].owner
-        and isinstance(node.inputs[1].owner.op, Elemwise)
-        and isinstance(node.inputs[1].owner.op.scalar_op, aes.Exp)
+    if isinstance(node.op, Elemwise) and isinstance(
+        node.op.scalar_op, (aes.Mul, aes.TrueDiv)
     ):
-        input1 = node.inputs[0].owner.inputs[0]
-        input2 = node.inputs[1].owner.inputs[0]
-        if isinstance(node.op.scalar_op, aes.Mul):
-            new_out = exp(input1 + input2)
-        else:  # TrueDiv
-            new_out = exp(input1 - input2)
-        if new_out.dtype != node.outputs[0].dtype:
-            new_out = cast(new_out, dtype=node.outputs[0].dtype)
-        return [new_out]
+        exps = [
+            n.owner.inputs[0]
+            for n in node.inputs
+            if n.owner
+            and hasattr(n.owner.op, "scalar_op")
+            and isinstance(n.owner.op.scalar_op, aes.Exp)
+        ]
+        # Can only do any rewrite if there are at least two exp-s
+        if len(exps) >= 2:
+            # Mul -> add; TrueDiv -> sub
+            orig_op, new_op = mul, add
+            if isinstance(node.op.scalar_op, aes.TrueDiv):
+                orig_op, new_op = true_div, sub
+            new_out = exp(new_op(*exps))
+            if new_out.dtype != node.outputs[0].dtype:
+                new_out = cast(new_out, dtype=node.outputs[0].dtype)
+            # The original Mul may have more than two factors, some of which may not be exp nodes.
+            # If so, we keep multiplying them with the new exp(sum) node.
+            # E.g.: e^x * y * e^z * w --> e^(x+z) * y * w
+            rest = [
+                n
+                for n in node.inputs
+                if not n.owner
+                or not hasattr(n.owner.op, "scalar_op")
+                or not isinstance(n.owner.op.scalar_op, aes.Exp)
+            ]
+            if len(rest) > 0:
+                new_out = orig_op(new_out, *rest)
+                if new_out.dtype != node.outputs[0].dtype:
+                    new_out = cast(new_out, dtype=node.outputs[0].dtype)
+            return [new_out]
 
 
 @register_stabilize
diff --git a/tests/tensor/rewriting/test_math.py b/tests/tensor/rewriting/test_math.py
@@ -4015,19 +4015,68 @@ def test_local_sumsqr2dot():
 
 
 def test_local_mulexp2expadd():
-    # e^x * e^y = e^(x+y)
-    # test simple scalars first
     x = scalar("x")
     y = scalar("y")
+    z = scalar("z")
+    w = scalar("w")
     expx = exp(x)
     expy = exp(y)
-    expx_expy = expx * expy
-    f = function([x, y], expx_expy)
-    utt.assert_allclose(f(3, 4), np.exp(3 + 4))
+    expz = exp(z)
+    expw = exp(w)
+
+    # e^x * e^y * e^z * e^w = e^(x+y+z+w)
+    op = expx * expy * expz * expw
+    f = function([x, y, z, w], op)
+    utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 + 5 + 6))
+    graph = f.maker.fgraph.toposort()
+    assert isinstance(graph[0].op, Elemwise)
+    inner_graph = graph[0].op.scalar_op.fgraph.toposort()
+    assert any(isinstance(n.op, aes.Add) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.Mul) for n in inner_graph)
+
+    # e^x * e^y * e^z / e^w = e^(x+y+z-w)
+    op = expx * expy * expz / expw
+    f = function([x, y, z, w], op)
+    utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 + 5 - 6))
+    graph = f.maker.fgraph.toposort()
+    assert isinstance(graph[0].op, Elemwise)
+    inner_graph = graph[0].op.scalar_op.fgraph.toposort()
+    assert any(isinstance(n.op, aes.Add) for n in inner_graph)
+    assert any(isinstance(n.op, aes.Sub) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.Mul) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.TrueDiv) for n in inner_graph)
+
+    # e^x * e^y / e^z * e^w = e^(x+y-z+w)
+    op = expx * expy / expz * expw
+    f = function([x, y, z, w], op)
+    utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 - 5 + 6))
+    graph = f.maker.fgraph.toposort()
+    assert isinstance(graph[0].op, Elemwise)
+    inner_graph = graph[0].op.scalar_op.fgraph.toposort()
+    assert any(isinstance(n.op, aes.Add) for n in inner_graph)
+    assert any(isinstance(n.op, aes.Sub) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.Mul) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.TrueDiv) for n in inner_graph)
+
+    # e^x / e^y / e^z = (e^x / e^y) / e^z = e^(x-y-z)
+    op = expx / expy / expz
+    f = function([x, y, z], op)
+    utt.assert_allclose(f(3, 4, 5), np.exp(3 - 4 - 5))
+    graph = f.maker.fgraph.toposort()
+    assert isinstance(graph[0].op, Elemwise)
+    inner_graph = graph[0].op.scalar_op.fgraph.toposort()
+    assert any(isinstance(n.op, aes.Sub) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.TrueDiv) for n in inner_graph)
+
+    # e^x * y * e^z * w = e^(x+z) * y * w
+    op = expx * y * expz * w
+    f = function([x, y, z, w], op)
+    utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 5) * 4 * 6)
     graph = f.maker.fgraph.toposort()
     assert isinstance(graph[0].op, Elemwise)
     inner_graph = graph[0].op.scalar_op.fgraph.toposort()
     assert any(isinstance(n.op, aes.Add) for n in inner_graph)
+    assert any(isinstance(n.op, aes.Mul) for n in inner_graph)
 
     # expect same for matrices as well
     mx = matrix("mx")
@@ -4040,28 +4089,20 @@ def test_local_mulexp2expadd():
     assert isinstance(graph[0].op, Elemwise)
     inner_graph = graph[0].op.scalar_op.fgraph.toposort()
     assert any(isinstance(n.op, aes.Add) for n in inner_graph)
+    assert not any(isinstance(n.op, aes.Mul) for n in inner_graph)
 
     # checking whether further rewrites can proceed after this one as one would expect
     # e^x * e^(-x) = e^(x-x) = e^0 = 1
     f = function([x], expx * exp(neg(x)))
-    graph = f.maker.fgraph.toposort()
-    assert isinstance(graph[0].inputs[0], TensorConstant)
     utt.assert_allclose(f(42), 1)
-
-    # e^x / e^y = e^(x-y)
-    expx_div_expy = expx / expy
-    f = function([x, y], expx_div_expy)
-    utt.assert_allclose(f(5, 3), np.exp(5 - 3))
     graph = f.maker.fgraph.toposort()
-    assert isinstance(graph[0].op, Elemwise)
-    inner_graph = graph[0].op.scalar_op.fgraph.toposort()
-    assert any(isinstance(n.op, aes.Sub) for n in inner_graph)
+    assert isinstance(graph[0].inputs[0], TensorConstant)
 
     # e^x / e^x = e^(x-x) = e^0 = 1
     f = function([x], expx / expx)
+    utt.assert_allclose(f(42), 1)
     graph = f.maker.fgraph.toposort()
     assert isinstance(graph[0].inputs[0], TensorConstant)
-    utt.assert_allclose(f(42), 1)
 
 
 def test_local_expm1():