pytorch · cccclai · Jun 27, 2024 · Jun 27, 2024 · Jun 27, 2024 · Jun 27, 2024
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -55,6 +55,14 @@ else
   QE=OFF
 fi
 
+if [[ "${MODE}" =~ .*mps.* ]]; then
+  MPS=ON
+else
+  MPS=OFF
+fi
+
+echo "MPS option ${MPS}"
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -77,6 +85,7 @@ cmake_install_executorch_libraries() {
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
+        -DEXECUTORCH_BUILD_MPS="$MPS" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
     cmake --build cmake-out -j9 --target install --config Debug
@@ -142,6 +151,9 @@ fi
 if [[ "${QE}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
 fi
+if [[ "${MPS}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --mps --disable_dynamic_shape"
+fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -35,197 +35,14 @@ jobs:
 
           PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"
 
-  test-models-macos:
-    name: test-models-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    needs: gather-models
-    strategy:
-      matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      python-version: '3.11'
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: ${{ matrix.timeout }}
-      script: |
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=${{ matrix.build-tool }}
-        BACKEND=${{ matrix.backend }}
-        DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
-        # Build and test xecutorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
-
-  test-custom-ops-macos:
-    name: test-custom-ops-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
-        # Build and test custom ops
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
-
-  test-selective-build-macos:
-    name: test-selective-build-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
-        # Build and test selective build
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-  test-demo-backend-delegation:
-    name: test-demo-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: buck2
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL=${{ matrix.build-tool }}
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
-
-  test-arm-backend-delegation:
-    name: test-arm-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-arm-sdk
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_flatc_from_source
-        install_executorch
-
-        install_arm
-
-        # Increase number of files user can monitor to bypass buck failures.
-        # Hopefully this is high enough for this setup.
-        sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
-
-        # Test ethos-u delegate examples with run.sh
-        PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/
-
-  test-arm-reference-delegation:
-    name: test-arm-reference-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-arm-sdk
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_flatc_from_source
-        install_executorch
-
-        install_arm
-
-        # Run arm unit tests
-        pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test
-
-  test-coreml-delegate:
-    name: test-coreml-delegate
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-13-xlarge
-      python-version: '3.11'
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=cmake
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
-        # Build and test coreml delegate
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
-
-  test-pybind-build-macos:
-    name: test-pybind-build-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        bash .ci/scripts/setup-conda.sh
-
-        # build module for executorch.extension.pybindings.portable_lib
-        BUILD_TOOL=${{ matrix.build-tool }}
-        EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
-
-        # see if we can import the module successfully
-        ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
-
   test-llama-runner-macos:
     name: test-llama-runner-mac
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       matrix:
         dtype: [fp32]
-        build-tool: [buck2, cmake]
-        mode: [portable, xnnpack+kv+custom]
+        build-tool: [cmake]
+        mode: [mps]
       fail-fast: false
     with:
       runner: macos-m1-stable
@@ -234,15 +51,31 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900
       script: |
-        bash .ci/scripts/setup-conda.sh
 
         DTYPE=${{ matrix.dtype }}
         BUILD_TOOL=${{ matrix.build-tool }}
         MODE=${{ matrix.mode }}
 
+        if [[ "${BUILD_TOOL}" == "buck2" ]]; then
+          # TODO: Will add more modes that don't support buck2
+          if [[ "${MODE}" == "mps" ]]; then
+            echo "mps doesn't support buck2."
+            exit 0
+          fi
+        fi
+
+        bash .ci/scripts/setup-conda.sh
+
         # Setup executorch
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
 
+        if [[ "${MODE}" == "mps" ]]; then
+          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
+          echo "Finishing installing mps."
+        else
+          echo "Not mps mode, skip installing mps."
+        fi
+
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
         # Test llama2

@@ -90,7 +90,7 @@ def load_llama_model(
         checkpoint or checkpoint_dir
     ) and params_path, "Both checkpoint/checkpoint_dir and params can't be empty"
     logging.info(
-        f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}"
+        f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}, enable_dynamic_shape={enable_dynamic_shape}"
     )
     model, example_inputs, _ = EagerModelFactory.create_model(
         "llama2",
@@ -228,7 +228,7 @@ def _get_dynamic_shape(self) -> Any:
             if self.enable_dynamic_shape:
                 return ({1: dim}, {0: dim})
             else:
-                None
+                return None
         else:
             return ({1: dim},)
 

@@ -36,6 +36,9 @@
     get_quant_embedding_transform,
     get_quant_weight_transform,
 )
+from .source_transformation.kv_cache import (
+    replace_kv_cache_with_dynamic_kv_cache
+)
 from .source_transformation.rope import materialze_broadcast_of_rope_freq_cis
 from .source_transformation.sdpa import (
     replace_causal_mask,
@@ -359,6 +362,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
         transforms.append(materialze_broadcast_of_rope_freq_cis)
 
     if args.use_sdpa_with_kv_cache:
+        # transforms.append(replace_kv_cache_with_dynamic_kv_cache)
         transforms.append(replace_sdpa_with_custom_op)
 
     if args.use_kv_cache: