Specialized numba sum impl

aseyboldt · aseyboldt · commit f66814dc098f · 2022-12-19T23:20:33.000-06:00
diff --git a/pytensor/link/numba/dispatch/elemwise.py b/pytensor/link/numba/dispatch/elemwise.py
@@ -45,7 +45,7 @@
 from pytensor.scalar.basic import add as add_as
 from pytensor.scalar.basic import scalar_maximum
 from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
-from pytensor.tensor.math import MaxAndArgmax, MulWithoutZeros
+from pytensor.tensor.math import MaxAndArgmax, MulWithoutZeros, Sum
 from pytensor.tensor.special import LogSoftmax, Softmax, SoftmaxGrad
 from pytensor.tensor.type import scalar
 
@@ -649,6 +649,42 @@ def elemwise_wrapper(*inputs):
     return elemwise_wrapper
 
 
+@numba_funcify.register(Sum)
+def numba_funcify_Sum(op, node, **kwargs):
+    axes = op.axis
+    if axes is None:
+        axes = list(range(node.inputs[0].ndim))
+
+    axes = list(axes)
+
+    ndim_input = node.inputs[0].ndim
+
+    if hasattr(op, "acc_dtype") and op.acc_dtype is not None:
+        acc_dtype = op.acc_dtype
+    else:
+        acc_dtype = node.outputs[0].type.dtype
+
+    np_acc_dtype = np.dtype(acc_dtype)
+
+    if ndim_input == len(axes):
+        @numba_njit
+        def impl_sum(array):
+
+            # TODO The accumulation itself should happen in acc_dtype...
+            #return array.sum(axes).astype(np_acc_dtype)
+            return np.asarray(array.sum())#.astype(np_acc_dtype)
+
+    else:
+        @numba_njit
+        def impl_sum(array):
+
+            # TODO The accumulation itself should happen in acc_dtype...
+            #return array.sum(axes).astype(np_acc_dtype)
+            return np.asarray(array.sum(axes))#.astype(np_acc_dtype)
+
+    return impl_sum
+
+
 @numba_funcify.register(CAReduce)
 def numba_funcify_CAReduce(op, node, **kwargs):
     axes = op.axis