pytorch
diff --git a/‎.github/release.yml
Lines changed: 71 additions & 0 deletions b/‎.github/release.yml
Lines changed: 71 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_leaky_relu_pass.py
Lines changed: 71 additions & 0 deletions b/‎backends/arm/_passes/decompose_leaky_relu_pass.py
Lines changed: 71 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 8 additions & 2 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎backends/arm/_passes/insert_table_ops.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/insert_table_ops.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_add.py
Lines changed: 21 additions & 5 deletions b/‎backends/arm/operators/op_add.py
Lines changed: 21 additions & 5 deletions
diff --git a/‎backends/arm/operators/op_erf.py
Lines changed: 44 additions & 0 deletions b/‎backends/arm/operators/op_erf.py
Lines changed: 44 additions & 0 deletions
diff --git a/‎backends/arm/operators/op_sigmoid.py
Lines changed: 9 additions & 2 deletions b/‎backends/arm/operators/op_sigmoid.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_tanh.py
Lines changed: 10 additions & 1 deletion b/‎backends/arm/operators/op_tanh.py
Lines changed: 10 additions & 1 deletion
diff --git a/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 3 additions & 2 deletions b/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 3 additions & 2 deletions
@@ -0,0 +1,71 @@
+# .github/release.yml
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: Breaking Changes 🛠
+      labels:
+        - Semver-Major
+        - breaking-change
+    - title: API
+      labels:
+        - "release notes: api"
+    - title: ARM
+      labels:
+        - "release notes: arm"
+    - title: NXP
+        labels:
+        - "release notes: nxp"
+    - title: Exir
+        labels:
+        - "release notes: exir"
+    - title: Misc
+        labels:
+        - "release notes: misc"
+    - title: Apple
+        labels:
+        - "release notes: apple"
+    - title: Build
+        labels:
+        - "release notes: build"
+    - title: Vulkan
+        labels:
+        - "release notes: vulkan"
+    - title: Cadence
+        labels:
+        - "release notes: cadence"
+    - title: Runtime
+        labels:
+        - "release notes: runtime"
+    - title: XNNPACK
+        labels:
+        - "release notes: xnnpack"
+    - title: Devtools
+        labels:
+        - "release notes: devtools"   
+    - title: Examples
+        labels:
+        - "release notes: examples"
+    - title: Mediatek
+        labels:
+        - "release notes: mediatek"
+    - title: Openvino
+        labels:
+        - "release notes: openvino"
+    - title: Qualcomm
+        labels:
+        - "release notes: qualcomm"
+    - title: Training
+        labels:
+        - "release notes: training"
+    - title: Quantization
+        labels:
+        - "release notes: quantization" 
+    - title: Ops & kernels
+        labels:
+        - "release notes: ops & kernels" 
+    - title: Other Changes
+      labels:
+        - "*"
@@ -21,6 +21,7 @@
 from .decompose_batchnorm_pass import DecomposeBatchNormPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
+from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
 
@@ -26,6 +26,7 @@
     DecomposeBatchNormPass,
     DecomposeDivPass,
     DecomposeLayerNormPass,
+    DecomposeLeakyReLUPass,
     DecomposeLinearPass,
     DecomposeMeanDimPass,
     DecomposeSelectPass,
@@ -121,6 +122,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
@@ -178,6 +180,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeVarPass())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(DecomposeDivPass())
+        self.add_pass(DecomposeLeakyReLUPass())
 
         if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
             # Numerically stable softmax uses amax which is not supported on Ethos-U55
 
@@ -0,0 +1,71 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_ops = (exir_ops.edge.aten.leaky_relu.default,)
+torch_ops = (torch.ops.aten.leaky_relu.default,)
+
+
+def _get_leaky_relu_ops(op) -> tuple:
+    if op in edge_ops:
+        return (
+            exir_ops.edge.aten.clamp.default,
+            exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.add.Tensor,
+        )
+    elif op in torch_ops:
+        return (
+            torch.ops.aten.clamp.default,
+            torch.ops.aten.full.default,
+            torch.ops.aten.mul.Tensor,
+            torch.ops.aten.add.Tensor,
+        )
+    else:
+        raise RuntimeError(f"Can't get decomposition ops for op {op}")
+
+
+class DecomposeLeakyReLUPass(ArmPass):
+    """
+    This pass decomposes Leaky ReLU into primitive operations.
+    LeakyReLU(x,slope) = max(0,x) + slope * min(0,x)
+
+    Example:
+        %op1 = clamp(x,0,None) (equivalent to max(0,x))
+        %op2 = clamp(x,None,0) (equivalent to min(0,x))
+        %op3 = full(x.shape,slope)
+        %op4 = mul(%op3,%op2)
+        %op5 = add(%op1,%op4)
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (edge_ops + torch_ops):
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        slope = args[1] if len(args) > 1 else 0.01
+        dtype = x.node.meta["val"].dtype
+        clamp, full, mul, add = _get_leaky_relu_ops(op)
+        op1 = super().call_operator(
+            op=clamp, args=(x, 0, None), kwargs=kwargs, meta=meta
+        )
+        op2 = super().call_operator(
+            op=clamp, args=(x, None, 0), kwargs=kwargs, meta=meta
+        )
+        op3 = super().call_operator(
+            op=full,
+            args=(x.node.meta["val"].shape, slope),
+            kwargs={"dtype": dtype},
+            meta=meta,
+        )
+        op4 = super().call_operator(op=mul, args=(op3, op2), kwargs=kwargs, meta=meta)
+        op5 = super().call_operator(op=add, args=(op1, op4), kwargs=kwargs, meta=meta)
+        return op5
@@ -136,8 +136,14 @@ def call(self, graph_module: GraphModule) -> PassResult:
                 continue
 
             # Make sure we haven't already set qparams meta information on the node
-            assert "input_qparams" not in n.meta.keys()
-            assert "output_qparams" not in n.meta.keys()
+            assert "input_qparams" not in n.meta, (
+                f'Unexpected key "input_qparams" found in meta for node {n}. '
+                "input_qparams should not have been set at this point"
+            )
+            assert "output_qparams" not in n.meta, (
+                f'Unexpected key "output_qparams" found in meta for node {n}. '
+                "output_qparams should not have been set at this point"
+            )
 
             # for the inputs and outputs search the graph for quantization info and
             # store the information in a dict with order of the _tensor_ inputs as key,
 
@@ -41,6 +41,7 @@ class TableOps:
     # Targets that follow a straigtforward one-to-one mapping to their table op
     unary_table_ops: Dict[EdgeOpOverload, Callable[[torch.Tensor], torch.Tensor]] = {
         exir_ops.edge.aten.ceil.default: torch.ceil,
+        exir_ops.edge.aten.erf.default: torch.erf,
         exir_ops.edge.aten.exp.default: torch.exp,
         exir_ops.edge.aten.floor.default: torch.floor,
         exir_ops.edge.aten.log.default: torch.log,
 
@@ -166,6 +166,7 @@ def is_node_supported(
             exir_ops.edge.aten.div.Tensor,
             exir_ops.edge.aten.eq.Tensor,
             exir_ops.edge.aten.eq.Scalar,
+            exir_ops.edge.aten.erf.default,
             exir_ops.edge.aten.exp.default,
             exir_ops.edge.aten.log.default,
             exir_ops.edge.aten.linear.default,
@@ -192,6 +193,7 @@ def is_node_supported(
             exir_ops.edge.aten.repeat.default,
             exir_ops.edge.aten.reciprocal.default,
             exir_ops.edge.aten.relu.default,
+            exir_ops.edge.aten.leaky_relu.default,
             exir_ops.edge.aten.rsqrt.default,
             exir_ops.edge.aten._softmax.default,
             exir_ops.edge.aten.select_copy.int,
@@ -257,6 +259,7 @@ def is_node_supported(
                 exir_ops.edge.aten.sub.Scalar,
                 exir_ops.edge.aten.mul.Scalar,
                 exir_ops.edge.aten.div.Scalar,
+                exir_ops.edge.aten.leaky_relu.default,
             ]
         if needs_decomp:
             self.reporter.report_reject(node, "Needs to be decomposed.")
 
@@ -19,6 +19,7 @@
     op_constant_pad_nd,
     op_conv2d,
     op_eq,
+    op_erf,
     op_exp,
     op_full,
     op_ge,
 
@@ -41,9 +41,18 @@ def define_node(
     ) -> None:
         # Specification (0.80) states that input and output types
         # should all be the same
-        assert inputs[0].dtype == inputs[1].dtype == output.dtype
+        if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
+            raise TypeError(
+                f"All IO needs to have the same data type, got input 1: "
+                f"{inputs[0].dtype}, input 2: {inputs[1].dtype} and output: "
+                f"{output.dtype}"
+            )
         # Handle int8 (quantized) and int32
-        assert inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]
+        supported_dtypes = [ts.DType.INT8, ts.DType.INT32]
+        if inputs[0].dtype not in supported_dtypes:
+            raise TypeError(
+                f'IO data type needs to be {supported_dtypes}, got "{inputs[0].dtype}"'
+            )
 
         dim_order = (
             inputs[0].dim_order
@@ -105,15 +114,22 @@ def define_node(
     ) -> None:
         # Specification (0.80) states that input and output types
         # should all be the same
-        assert inputs[0].dtype == inputs[1].dtype == output.dtype
+        if inputs[0].dtype != inputs[1].dtype or inputs[0].dtype != output.dtype:
+            raise TypeError(
+                f"All IO needs to have the same data type, got input 1: "
+                f"{inputs[0].dtype}, input 2: {inputs[1].dtype} and output: "
+                f"{output.dtype}"
+            )
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
             super().define_node(node, tosa_graph, inputs, output)
         else:
             # FP32 Add lowering
-            assert inputs[0].dtype == ts.DType.FP32
-            assert output.dtype == ts.DType.FP32
+            if inputs[0].dtype != ts.DType.FP32:
+                raise TypeError(
+                    f"Expected IO data type to be FP32, got {inputs[0].dtype}"
+                )
 
             input1, input2 = tutils.reshape_for_broadcast(tosa_graph, inputs)
 
 
@@ -0,0 +1,44 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# pyre-unsafe
+from typing import List
+
+import serializer.tosa_serializer as ts  # type: ignore
+import torch.fx
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from serializer.tosa_serializer import TosaOp
+
+
+@register_node_visitor
+class ERFVisitor_080_MI(NodeVisitor):
+    target = "aten.erf.default"
+
+    # BI case handled by op_table
+    tosa_specs = [TosaSpecification.create_from_string("TOSA-0.80+MI")]
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+        if not (inputs[0].dtype == output.dtype):
+            raise ValueError(
+                "All inputs and output need same dtype."
+                f"Got {inputs[0].dtype=}, {output.dtype=}"
+            )
+        if not (inputs[0].dtype == ts.DType.FP32):
+            raise ValueError("All inputs need to be FP32." f"Got {inputs[0].dtype=}")
+        # MI lowering
+        tosa_graph.addOperator(TosaOp.Op().ERF, [inputs[0].name], [output.name])
@@ -36,7 +36,14 @@ def define_node(
         output: TosaArg,
     ) -> None:
 
-        assert len(node.all_input_nodes) == 1
-        assert inputs[0].dtype == output.dtype == ts.DType.FP32
+        if len(node.all_input_nodes) != 1:
+            raise ValueError(
+                f"Expected 1 input for {self.target}, got {len(node.all_input_nodes)}"
+            )
+        if inputs[0].dtype != ts.DType.FP32 or output.dtype != ts.DType.FP32:
+            raise ValueError(
+                f"Input and output for {self.target} need to be FP32, got input_dtype: "
+                f"{inputs[0].dtype} and output_dtype: {output.dtype}"
+            )
 
         tosa_graph.addOperator(TosaOp.Op().SIGMOID, [inputs[0].name], [output.name])
@@ -34,5 +34,14 @@ def define_node(
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
-        assert inputs[0].dtype == output.dtype == ts.DType.FP32
+        if len(node.all_input_nodes) != 1:
+            raise ValueError(
+                f"Expected 1 input for {self.target}, got {len(node.all_input_nodes)}"
+            )
+        if inputs[0].dtype != ts.DType.FP32 or output.dtype != ts.DType.FP32:
+            raise ValueError(
+                f"Input and output for {self.target} need to be FP32, got input_dtype: "
+                f"{inputs[0].dtype} and output_dtype: {output.dtype}"
+            )
+
         tosa_graph.addOperator(TosaOp.Op().TANH, [inputs[0].name], [output.name])
@@ -164,6 +164,7 @@ def _match_pattern(
 _one_to_one = [
     torch.ops.aten.abs.default,
     torch.ops.aten.ceil.default,
+    torch.ops.aten.erf.default,
     torch.ops.aten.exp.default,
     torch.ops.aten.floor.default,
     torch.ops.aten.log.default,
@@ -217,6 +218,8 @@ def _match_pattern(
     torch.ops.aten.pad.default,
     torch.ops.aten.amax.default,
     torch.ops.aten.amin.default,
+    torch.ops.aten.clamp.default,
+    torch.ops.aten.clamp.Tensor,
 ]
 
 # Operators that can inherit the quantization specs from its parent node
@@ -236,8 +239,6 @@ def _match_pattern(
     torch.ops.aten.flatten.using_ints,
     torch.ops.aten.dropout.default,
     torch.ops.aten.dropout_.default,
-    torch.ops.aten.clamp.default,
-    torch.ops.aten.clamp.Tensor,
     torch.ops.aten.where,
     operator.getitem,
 ]