pytorch · metascroy · Mar 4, 2025 · Mar 3, 2025 · Mar 3, 2025 · Mar 4, 2025
diff --git a/.ci/scripts/test_ane_static_llama.sh b/.ci/scripts/test_ane_static_llama.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
+
+# Download stories llama110m artifacts
+download_stories_model_artifacts
+
+python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
+
+
+python run.py -m model.pte -t tokenizer.model --prompt "Once upon a time," --temperature 0.0 &> tmp.txt
+tail -n +6 tmp.txt &> output.txt
+
+cat output.txt
+
+printf 'Once upon a time,there was a little girl named L ily . She loved to play outside in the sun sh ine . One day , she saw ' &> expected.txt
+
+
+if diff output.txt expected.txt > /dev/null; then
+    echo "Output matches."
+else
+    echo "Output does not match."
+    echo "\n\nExpected:"
+    cat expected.txt
+
+    echo "\n\nGot:"
+    cat output.txt
+
+    echo "\n\nDiff:"
+    diff output.txt expected.txt
+    exit 1
+fi
+
+popd
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -495,6 +495,38 @@ jobs:
         # Test static llama weight sharing and accuracy
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
 
+  test-static-llama-ane:
+    name: test-static-llama-ane
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+        bash .ci/scripts/setup-conda.sh
+        eval "$(conda shell.bash hook)"
+
+        # Setup MacOS dependencies as there is no Docker support on MacOS atm
+        PYTHON_EXECUTABLE=python \
+        EXECUTORCH_BUILD_PYBIND=ON \
+        CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
+        ${CONDA_RUN} --no-capture-output \
+        .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Debug --editable false
+
+
+        # Install llama3_2_vision dependencies.
+        PYTHON_EXECUTABLE=python \
+        ${CONDA_RUN} --no-capture-output \
+        ./examples/models/llama3_2_vision/install_requirements.sh
+
+        # Install coreml
+        sh ./backends/apple/coreml/scripts/install_requirements.sh
+
+        # Test ANE llama
+        sh .ci/scripts/test_ane_static_llama.sh
+
   test-qnn-models-linux:
     name: test-qnn-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

@@ -203,6 +203,7 @@ def main() -> None:
             torch.ops.aten.scaled_dot_product_attention.default,
             # preserve norm op for numerical stability
             torch.ops.aten.linalg_vector_norm.default,
+            torch.ops.aten.reciprocal.default,
         ],
         compile_config=EdgeCompileConfig(
             _check_ir_validity=False,

@@ -134,8 +134,10 @@ def _norm(self, x):
         # We have yet to do large scale evaluations on the numeric stability of this solution, but note that
         # it appears better than what exists currently (removing FP32 casts and using FP16)
         rms_norm_eps0 = (
-            x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
-        ) / torch.linalg.vector_norm(x, dim=-1, keepdim=True)
+            x
+            * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
+            * torch.reciprocal(torch.linalg.vector_norm(x, dim=-1, keepdim=True))
+        )
         return rms_norm_eps0
 
     def forward(self, x):