pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_leaky_relu_pass.py
Lines changed: 71 additions & 0 deletions b/‎backends/arm/_passes/decompose_leaky_relu_pass.py
Lines changed: 71 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/test/models/test_conformer.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/models/test_conformer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/models/test_llama.py
Lines changed: 2 additions & 1 deletion b/‎backends/arm/test/models/test_llama.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/test/ops/test_leaky_relu.py
Lines changed: 88 additions & 0 deletions b/‎backends/arm/test/ops/test_leaky_relu.py
Lines changed: 88 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_mm.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/test/ops/test_mm.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/test/test_model.py
Lines changed: 3 additions & 0 deletions b/‎backends/arm/test/test_model.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/tosa_backend.py
Lines changed: 6 additions & 4 deletions b/‎backends/arm/tosa_backend.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎backends/cadence/aot/functions_fusion_g3.yaml
Lines changed: 5 additions & 0 deletions b/‎backends/cadence/aot/functions_fusion_g3.yaml
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/fusion_g3/operators/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-7ae0ce6360b6e4f944906502d20da24c04debee5
+59d5cf083b4f860dea76fe8936076177f9367f10
@@ -21,6 +21,7 @@
 from .decompose_batchnorm_pass import DecomposeBatchNormPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
+from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
 
@@ -26,6 +26,7 @@
     DecomposeBatchNormPass,
     DecomposeDivPass,
     DecomposeLayerNormPass,
+    DecomposeLeakyReLUPass,
     DecomposeLinearPass,
     DecomposeMeanDimPass,
     DecomposeSelectPass,
@@ -121,6 +122,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
@@ -178,6 +180,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeVarPass())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(DecomposeDivPass())
+        self.add_pass(DecomposeLeakyReLUPass())
 
         if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
             # Numerically stable softmax uses amax which is not supported on Ethos-U55
 
@@ -0,0 +1,71 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_ops = (exir_ops.edge.aten.leaky_relu.default,)
+torch_ops = (torch.ops.aten.leaky_relu.default,)
+
+
+def _get_leaky_relu_ops(op) -> tuple:
+    if op in edge_ops:
+        return (
+            exir_ops.edge.aten.clamp.default,
+            exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.add.Tensor,
+        )
+    elif op in torch_ops:
+        return (
+            torch.ops.aten.clamp.default,
+            torch.ops.aten.full.default,
+            torch.ops.aten.mul.Tensor,
+            torch.ops.aten.add.Tensor,
+        )
+    else:
+        raise RuntimeError(f"Can't get decomposition ops for op {op}")
+
+
+class DecomposeLeakyReLUPass(ArmPass):
+    """
+    This pass decomposes Leaky ReLU into primitive operations.
+    LeakyReLU(x,slope) = max(0,x) + slope * min(0,x)
+
+    Example:
+        %op1 = clamp(x,0,None) (equivalent to max(0,x))
+        %op2 = clamp(x,None,0) (equivalent to min(0,x))
+        %op3 = full(x.shape,slope)
+        %op4 = mul(%op3,%op2)
+        %op5 = add(%op1,%op4)
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (edge_ops + torch_ops):
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        slope = args[1] if len(args) > 1 else 0.01
+        dtype = x.node.meta["val"].dtype
+        clamp, full, mul, add = _get_leaky_relu_ops(op)
+        op1 = super().call_operator(
+            op=clamp, args=(x, 0, None), kwargs=kwargs, meta=meta
+        )
+        op2 = super().call_operator(
+            op=clamp, args=(x, None, 0), kwargs=kwargs, meta=meta
+        )
+        op3 = super().call_operator(
+            op=full,
+            args=(x.node.meta["val"].shape, slope),
+            kwargs={"dtype": dtype},
+            meta=meta,
+        )
+        op4 = super().call_operator(op=mul, args=(op3, op2), kwargs=kwargs, meta=meta)
+        op5 = super().call_operator(op=add, args=(op1, op4), kwargs=kwargs, meta=meta)
+        return op5
@@ -193,6 +193,7 @@ def is_node_supported(
             exir_ops.edge.aten.repeat.default,
             exir_ops.edge.aten.reciprocal.default,
             exir_ops.edge.aten.relu.default,
+            exir_ops.edge.aten.leaky_relu.default,
             exir_ops.edge.aten.rsqrt.default,
             exir_ops.edge.aten._softmax.default,
             exir_ops.edge.aten.select_copy.int,
@@ -258,6 +259,7 @@ def is_node_supported(
                 exir_ops.edge.aten.sub.Scalar,
                 exir_ops.edge.aten.mul.Scalar,
                 exir_ops.edge.aten.div.Scalar,
+                exir_ops.edge.aten.leaky_relu.default,
             ]
         if needs_decomp:
             self.reporter.report_reject(node, "Needs to be decomposed.")
 
@@ -218,6 +218,8 @@ def _match_pattern(
     torch.ops.aten.pad.default,
     torch.ops.aten.amax.default,
     torch.ops.aten.amin.default,
+    torch.ops.aten.clamp.default,
+    torch.ops.aten.clamp.Tensor,
 ]
 
 # Operators that can inherit the quantization specs from its parent node
@@ -237,8 +239,6 @@ def _match_pattern(
     torch.ops.aten.flatten.using_ints,
     torch.ops.aten.dropout.default,
     torch.ops.aten.dropout_.default,
-    torch.ops.aten.clamp.default,
-    torch.ops.aten.clamp.Tensor,
     torch.ops.aten.where,
     operator.getitem,
 ]
 
@@ -31,7 +31,7 @@ class TestConformer(unittest.TestCase):
     # .to_executorch step, i.e. after Arm partitioner.
     ops_after_partitioner = {
         "executorch_exir_dialects_edge__ops_aten_max_default": 1,
-        "torch.ops.aten._assert_scalar.default": 10,
+        "torch.ops.aten._assert_scalar.default": 7,
         "torch.ops.aten._local_scalar_dense.default": 1,
     }
 
 
@@ -11,6 +11,7 @@
 import sys
 import unittest
 
+import pytest
 import torch
 
 from executorch.backends.arm.test import common, conftest
@@ -102,7 +103,7 @@ def test_llama_tosa_MI(self):
         llama_model, llama_inputs, llama_meta = self.prepare_model()
 
         if llama_model is None and llama_inputs is None and llama_meta is None:
-            return
+            pytest.skip("Missing model and/or input files")
 
         with torch.no_grad():
             (
 
@@ -0,0 +1,88 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+aten_op = "torch.ops.aten.leaky_relu.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten_leaky_relu_default"
+input_t1 = Tuple[torch.Tensor]  # Input x
+
+
+class LeakyReLU(torch.nn.Module):
+    def __init__(self, slope: float = 0.01):
+        super().__init__()
+        self.activation = torch.nn.LeakyReLU(slope)
+
+    def forward(self, x: torch.Tensor):
+        return self.activation(x)
+
+    test_data: dict[str, input_t1] = {
+        "zeros": ((torch.zeros(1, 1, 5, 5),), 0.01),
+        "ones": ((torch.ones(1, 32, 112, 112),), 0.01),
+        "rand": ((torch.rand(1, 96, 56, 56),), 0.2),
+        "3Dtensor": ((torch.rand(5, 5, 5),), 0.001),
+        "negative_slope": ((torch.rand(1, 16, 128, 128),), -0.002),
+    }
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+def test_leaky_relu_tosa_MI(test_data):
+    data, slope = test_data
+    pipeline = TosaPipelineMI[input_t1](
+        LeakyReLU(slope), data, [], use_to_edge_transform_and_lower=True
+    )
+    pipeline.add_stage_after(
+        "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op]
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+def test_leaky_relu_tosa_BI(test_data):
+    data, slope = test_data
+    pipeline = TosaPipelineBI[input_t1](
+        LeakyReLU(slope), data, [], use_to_edge_transform_and_lower=True
+    )
+    pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+    pipeline.run()
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+@common.XfailIfNoCorstone300
+def test_leaky_relu_u55_BI(test_data):
+    data, slope = test_data
+    pipeline = EthosU55PipelineBI[input_t1](
+        LeakyReLU(slope),
+        data,
+        [],
+        run_on_fvp=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+    pipeline.run()
+
+
+@common.parametrize("test_data", LeakyReLU.test_data)
+@common.XfailIfNoCorstone320
+def test_leaky_relu_u85_BI(test_data):
+    data, slope = test_data
+    pipeline = EthosU85PipelineBI[input_t1](
+        LeakyReLU(slope),
+        data,
+        [],
+        run_on_fvp=True,
+        use_to_edge_transform_and_lower=True,
+    )
+    pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op])
+    pipeline.run()
@@ -6,6 +6,7 @@
 
 from typing import Callable
 
+import pytest
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -53,6 +54,7 @@ def test_mm_tosa_u55(test_data_generator: Callable[[], tuple]):
 
 
 @parameterized.expand(MM.test_data_generators)
+@pytest.mark.flaky  # Investigate flakiness (MLETORCH-870)
 def test_mm_tosa_u85(test_data_generator: Callable[[], tuple]):
     test_data = test_data_generator()
     EthosU85PipelineBI[test_t](MM(), test_data, MM.aten_op, MM.exir_op).run()
@@ -67,6 +69,7 @@ def test_mm_tosa_u55_on_fvp(test_data_generator: Callable[[], tuple]):
 
 @parameterized.expand(MM.test_data_generators)
 @common.SkipIfNoCorstone320
+@pytest.mark.flaky  # Investigate flakiness (MLETORCH-870)
 def test_mm_tosa_u85_on_fvp(test_data_generator: Callable[[], tuple]):
     test_data = test_data_generator()
     EthosU85PipelineBI[test_t](
 
@@ -176,6 +176,7 @@ def build_ethosu_runtime(
     pte_file: str,
     target: str,
     system_config: str,
+    memory_mode: str,
     extra_flags: str,
     elf_build_path: str,
 ):
@@ -195,6 +196,7 @@ def build_ethosu_runtime(
             f"--target={target}",
             "--build_type=Release",
             f"--system_config={system_config}",
+            f"--memory_mode={memory_mode}",
             extra_build_flag,
             f"--output={elf_build_path}",
         ]
@@ -256,6 +258,7 @@ def run_elf_with_fvp(script_path: str, elf_file: str, target: str):
                 pte_file,
                 args.target,
                 args.system_config,
+                args.memory_mode,
                 args.extra_flags,
                 elf_build_path,
             )
 
@@ -75,12 +75,14 @@ def preprocess(  # noqa: C901
                 input_order = list(map(int, spec.value.decode().split(",")))
 
         # Check that the output format is set correctly in the compile spec
-        assert output_format == "tosa", "output format must be tosa"
+        if output_format != "tosa":
+            raise ValueError(f'Invalid output format {output_format}, must be "tosa"')
 
         tosa_spec = get_tosa_spec(compile_spec)
-        assert (
-            tosa_spec is not None
-        ), "TOSA backend needs a TOSA version specified in the CompileSpec!"
+        if tosa_spec is None:
+            raise ValueError(
+                "TOSA backend needs a TOSA version specified in the CompileSpec"
+            )
 
         logger.info(f"Converting ExportedProgram to TOSA: {tosa_spec}")
 
 
@@ -171,6 +171,11 @@
   kernels:
     - arg_meta: null
       kernel_name: cadence::impl::G3::exp_out
+      
+- op: hardtanh.out
+  kernels:
+    - arg_meta: null
+      kernel_name: cadence::impl::G3::hardtanh_out
 
 # custom ops
 - func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
 
@@ -50,6 +50,7 @@ set(_aten_ops__srcs
     "${CMAKE_CURRENT_SOURCE_DIR}/op_lt.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/op_where.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/op_clamp.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/op_hardtanh.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-7ae0ce6360b6e4f944906502d20da24c04debee5`
	`1`	`+59d5cf083b4f860dea76fe8936076177f9367f10`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ class TestConformer(unittest.TestCase):`
`31`	`31`	`# .to_executorch step, i.e. after Arm partitioner.`
`32`	`32`	`ops_after_partitioner = {`
`33`	`33`	`"executorch_exir_dialects_edge__ops_aten_max_default": 1,`
`34`		`- "torch.ops.aten._assert_scalar.default": 10,`
	`34`	`+ "torch.ops.aten._assert_scalar.default": 7,`
`35`	`35`	`"torch.ops.aten._local_scalar_dense.default": 1,`
`36`	`36`	`}`
`37`	`37`