pytorch
diff --git a/‎.ci/scripts/gather_benchmark_configs.py
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/gather_benchmark_configs.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/android-perf-private-device-experiment.yml
Lines changed: 62 additions & 0 deletions b/‎.github/workflows/android-perf-private-device-experiment.yml
Lines changed: 62 additions & 0 deletions
diff --git a/‎.github/workflows/android-release-artifacts.yml
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/android-release-artifacts.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_silu_pass.py
Lines changed: 34 additions & 0 deletions b/‎backends/arm/_passes/decompose_silu_pass.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/replace_scalar_with_tensor_pass.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/replace_scalar_with_tensor_pass.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/ethos_u55_support.py
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/ethos_u55_support.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_get_item.py
Lines changed: 0 additions & 35 deletions b/‎backends/arm/operators/op_get_item.py
Lines changed: 0 additions & 35 deletions
diff --git a/‎backends/arm/operators/ops_identity.py
Lines changed: 47 additions & 0 deletions b/‎backends/arm/operators/ops_identity.py
Lines changed: 47 additions & 0 deletions
diff --git a/‎backends/arm/quantizer/arm_quantizer.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/quantizer/arm_quantizer.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 6 additions & 1 deletion b/‎backends/arm/quantizer/quantization_annotator.py
Lines changed: 6 additions & 1 deletion
@@ -23,6 +23,7 @@
     "samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
     "samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
     "google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
+    "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
 }
 
 # Predefined benchmark configurations
 
@@ -0,0 +1,62 @@
+name: android-perf (private devices)
+
+on:
+  schedule:
+    - cron: 0 0,4,8,12,16,20 * * *
+  pull_request:
+    paths:
+      - .github/workflows/android-perf-private-device-experiment.yml
+  push:
+    branches:
+      - main
+    paths:
+      - .github/workflows/android-perf-private-device-experiment.yml
+  # Note: GitHub has an upper limit of 10 inputs
+  workflow_dispatch:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: google_pixel_3_private_rooted
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: google_pixel_3_private_rooted
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+
+concurrency:
+  group: android-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  android:
+    uses: ./.github/workflows/android-perf.yml
+    secrets: inherit
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      models: ${{ inputs.models }}
+      devices: google_pixel_3_private_rooted
+      benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -49,7 +49,8 @@ jobs:
       contents: read
     with:
       secrets-env: EXECUTORCH_MAVEN_SIGNING_KEYID EXECUTORCH_MAVEN_SIGNING_PASSWORD EXECUTORCH_MAVEN_CENTRAL_PASSWORD EXECUTORCH_MAVEN_CENTRAL_USERNAME EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS
-      runner: linux.2xlarge
+      # As this job has access to Maven credential, run this on a fresh ephemeral runner
+      runner: ephemeral.linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'recursive'
       ref: ${{ github.sha }}
 
@@ -26,6 +26,7 @@
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
+from .decompose_silu_pass import DecomposeSiluPass  # noqa
 from .decompose_softmax_pass import DecomposeSoftmaxPass  # noqa
 from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass  # noqa
 from .decompose_sqrt_pass import DecomposeSqrtPass  # noqa
 
@@ -31,6 +31,7 @@
     DecomposeLinearPass,
     DecomposeMeanDimPass,
     DecomposeSelectPass,
+    DecomposeSiluPass,
     DecomposeSoftmaxPass,
     DecomposeSoftmaxUnstablePass,
     DecomposeSqrtPass,
@@ -196,6 +197,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeSqrtPass())
+        self.add_pass(DecomposeSiluPass())
 
         if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
             # Numerically stable softmax uses amax which is not supported on Ethos-U55
 
@@ -0,0 +1,34 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import torch
+from executorch.exir.pass_base import ExportPass
+
+aten_silu_ops = (torch.ops.aten.silu.default, torch.ops.aten.silu_.default)
+
+
+class DecomposeSiluPass(ExportPass):
+    """
+    This pass decomposes silu into a mul and a sigmoid node.
+
+    Example:
+        y = silu(a)
+    Becomes:
+        x = sigmoid(a)
+        y = mul(a,x)
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (aten_silu_ops):
+            return super().call_operator(op, args, kwargs, meta)
+        sigmoid_op = torch.ops.aten.sigmoid.default
+        mul_op = torch.ops.aten.mul.Tensor
+
+        original = args[0]
+        sigmoid = super().call_operator(sigmoid_op, (original,), {}, meta)
+
+        return super().call_operator(mul_op, (original, sigmoid), {}, meta)
@@ -49,6 +49,7 @@ def __init__(self, exported_program):
         exir_ops.edge.aten.bitwise_left_shift.Tensor,
         exir_ops.edge.aten.eq.Tensor,
         exir_ops.edge.aten.gt.Tensor,
+        exir_ops.edge.aten.ge.Tensor,
         exir_ops.edge.aten.lt.Tensor,
         exir_ops.edge.aten.pow.Tensor_Tensor,
         exir_ops.edge.aten.where.self,
 
@@ -27,6 +27,7 @@
     exir_ops.edge.aten.__lshift__.Scalar: exir_ops.edge.aten.bitwise_left_shift.Tensor,
     exir_ops.edge.aten.eq.Scalar: exir_ops.edge.aten.eq.Tensor,
     exir_ops.edge.aten.gt.Scalar: exir_ops.edge.aten.gt.Tensor,
+    exir_ops.edge.aten.ge.Scalar: exir_ops.edge.aten.ge.Tensor,
     exir_ops.edge.aten.lt.Scalar: exir_ops.edge.aten.lt.Tensor,
     torch.ops.aten.add.Scalar: torch.ops.aten.add.Tensor,
     torch.ops.aten.sub.Scalar: torch.ops.aten.sub.Tensor,
@@ -36,6 +37,7 @@
     torch.ops.aten.__lshift__.Scalar: torch.ops.aten.bitwise_left_shift.Tensor,
     torch.ops.aten.eq.Scalar: torch.ops.aten.eq.Tensor,
     torch.ops.aten.gt.Scalar: torch.ops.aten.gt.Tensor,
+    torch.ops.aten.ge.Scalar: torch.ops.aten.ge.Tensor,
     torch.ops.aten.lt.Scalar: torch.ops.aten.lt.Tensor,
 }
 
 
@@ -134,6 +134,7 @@ class EthosU55NotSupported(OperatorSupportBase):
         exir_ops.edge.aten.eq.Tensor,
         exir_ops.edge.aten.eq.Scalar,
         exir_ops.edge.aten.ge.Tensor,
+        exir_ops.edge.aten.ge.Scalar,
         exir_ops.edge.aten.gt.Tensor,
         exir_ops.edge.aten.gt.Scalar,
         exir_ops.edge.aten.le.Tensor,
 
@@ -178,6 +178,7 @@ def is_node_supported(
             exir_ops.edge.aten.full.default,
             exir_ops.edge.aten.full_like.default,
             exir_ops.edge.aten.ge.Tensor,
+            exir_ops.edge.aten.ge.Scalar,
             exir_ops.edge.aten.gt.Tensor,
             exir_ops.edge.aten.gt.Scalar,
             exir_ops.edge.aten.le.Tensor,
@@ -228,6 +229,7 @@ def is_node_supported(
             exir_ops.edge.aten.__lshift__.Scalar,
             torch.ops.aten.scalar_tensor.default,
             exir_ops.edge.aten.gelu.default,
+            exir_ops.edge.aten.alias_copy.default,
         ]
 
         return supported
 
@@ -22,7 +22,6 @@
     op_erf,
     op_exp,
     op_ge,
-    op_get_item,
     op_gt,
     op_le,
     op_log,
@@ -51,5 +50,6 @@
     op_view,
     op_where,
     ops_binary,
+    ops_identity,
     ops_unary,
 )
@@ -0,0 +1,47 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+from typing import List
+
+import torch
+import torch.fx
+
+import tosa_tools.v0_80.serializer.tosa_serializer as ts
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+
+
+def identity_operator_factory(identity_target: str):
+    """
+    Creates and registers NodeVisitors for operators that map directly
+    to a TOSA IDENTITY op.
+    """
+
+    class IdentityOperatorVisitor(NodeVisitor):
+        target = identity_target
+
+        def define_node(
+            self,
+            node: torch.fx.Node,
+            tosa_graph: ts.TosaSerializer,
+            inputs: List[TosaArg],
+            output: TosaArg,
+        ) -> None:
+            # Simply add an identityOp
+            tosa_graph.addOperator(
+                ts.TosaOp.Op().IDENTITY, [inputs[0].name], [output.name]
+            )
+
+    register_node_visitor(IdentityOperatorVisitor)
+
+
+identity_operator_factory("getitem")
+identity_operator_factory("aten.alias_copy.default")
@@ -286,10 +286,10 @@ def _annotate_all_static_patterns(
         quantization_config: Optional[QuantizationConfig],
         filter_fn: Optional[Callable[[Node], bool]] = None,
     ) -> GraphModule:
-        """Loops over all STATIC_OPS and runs the corresponding registred annotator.
+        """Loops over all STATIC_OPS and runs the corresponding registered annotator.
         Args:
             model: The model to annotate statically.
-            quantization_config: Specifices the QuantizationSpecs for the model's
+            quantization_config: Specifies the QuantizationSpecs for the model's
                 input activations, output activations, weights and biases.
             filter_fn: An optional filter function that takes a node and returns whether the node should be annotated.
         Returns:
 
@@ -244,6 +244,11 @@ def _match_pattern(
     operator.getitem,
 ]
 
+_one_to_one_shared_input_or_input_act_qspec = [
+    torch.ops.aten.adaptive_avg_pool2d.default,
+    torch.ops.aten.alias_copy.default,
+]
+
 
 def get_quant_properties(  # noqa: C901
     node: Node, gm: torch.fx.GraphModule, quantization_config
@@ -332,7 +337,7 @@ def any_or_hardtanh_min_zero(n: Node):
             _QuantProperty(2, shared_qspec),  # type: ignore[arg-type]
         ]
         quant_properties.quant_output = _QuantProperty(0, shared_qspec)  # type: ignore[arg-type]
-    elif node.target == torch.ops.aten.adaptive_avg_pool2d.default:
+    elif node.target in _one_to_one_shared_input_or_input_act_qspec:
         input_qspec = (
             SharedQuantizationSpec(node.args[0])  # type: ignore[arg-type]
             if arm_quantizer_utils.is_output_annotated(node.args[0])  # type: ignore
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`"samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",`
`24`	`24`	`"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",`
`25`	`25`	`"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",`
	`26`	`+ "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`# Predefined benchmark configurations`