Skip to content

add ci job for mps #4076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
dda5cc6
add ci job for mps
cccclai Jun 27, 2024
8730476
add stories model mps test
cccclai Jun 27, 2024
c39c04e
Update trunk.yml to remove some invalid chars
huydhn Jun 27, 2024
a987d76
fix yaml syntax
cccclai Jun 27, 2024
9d3985a
Merge branch 'mps_stories_ci' of https://github.com/cccclai/executorc…
cccclai Jun 27, 2024
03eb910
install mps
cccclai Jun 27, 2024
02c0246
debug mps error
cccclai Jun 28, 2024
e48cd15
debug mps error
cccclai Jun 28, 2024
f72b008
selectively install mps
cccclai Jun 28, 2024
66d5a07
add buck mode back
cccclai Jun 28, 2024
56482f1
stop buck mode earlier
cccclai Jun 28, 2024
eb3a6c3
fix buck2 mps
cccclai Jun 28, 2024
bf36f30
add ci job for mps
cccclai Jun 27, 2024
bf2984c
add stories model mps test
cccclai Jun 27, 2024
0b44abd
fix yaml syntax
cccclai Jun 27, 2024
8785b28
install mps
cccclai Jun 27, 2024
ea781ae
debug mps error
cccclai Jun 28, 2024
fdebd5a
debug mps error
cccclai Jun 28, 2024
5405b5d
selectively install mps
cccclai Jun 28, 2024
e3ed646
add buck mode back
cccclai Jun 28, 2024
be9cb0e
stop buck mode earlier
cccclai Jun 28, 2024
afb49d8
fix buck2 mps
cccclai Jun 28, 2024
fa58c6f
rebase
cccclai Jun 28, 2024
042286f
rebase
cccclai Jun 28, 2024
81d043d
Merge branch 'pytorch:main' into mps_stories_ci
cccclai Jun 28, 2024
0d93e14
rebase with mps change
cccclai Jun 29, 2024
2d2608a
Merge remote-tracking branch 'origin/mps_stories_ci' into mps_stories_ci
cccclai Jun 29, 2024
9d4c556
fix mps backend
cccclai Jun 30, 2024
cff91e5
fix dynamic shape input
cccclai Jun 30, 2024
ff69c5c
add kv cache file
cccclai Jun 30, 2024
c19eb38
fix xnnpack
cccclai Jun 30, 2024
954eaaf
more fix
cccclai Jul 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ else
QE=OFF
fi

if [[ "${MODE}" =~ .*mps.* ]]; then
MPS=ON
else
MPS=OFF
fi

echo "MPS option ${MPS}"

if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
Expand All @@ -77,6 +85,7 @@ cmake_install_executorch_libraries() {
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DEXECUTORCH_BUILD_MPS="$MPS" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Debug
Expand Down Expand Up @@ -142,6 +151,9 @@ fi
if [[ "${QE}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
fi
if [[ "${MPS}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --mps --disable_dynamic_shape"
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}

Expand Down
205 changes: 19 additions & 186 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,197 +35,14 @@ jobs:

PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"

test-models-macos:
name: test-models-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
fail-fast: false
with:
runner: ${{ matrix.runner }}
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: ${{ matrix.timeout }}
script: |
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL=${{ matrix.build-tool }}
BACKEND=${{ matrix.backend }}
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}

bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test xecutorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"

test-custom-ops-macos:
name: test-custom-ops-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}

bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test custom ops
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"

test-selective-build-macos:
name: test-selective-build-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}

bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test selective build
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"

test-demo-backend-delegation:
name: test-demo-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
include:
- build-tool: buck2
- build-tool: cmake
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

BUILD_TOOL=${{ matrix.build-tool }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Test selective build
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"

test-arm-backend-delegation:
name: test-arm-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

source .ci/scripts/utils.sh
install_flatc_from_source
install_executorch

install_arm

# Increase number of files user can monitor to bypass buck failures.
# Hopefully this is high enough for this setup.
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024

# Test ethos-u delegate examples with run.sh
PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/

test-arm-reference-delegation:
name: test-arm-reference-delegation
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

source .ci/scripts/utils.sh
install_flatc_from_source
install_executorch

install_arm

# Run arm unit tests
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test

test-coreml-delegate:
name: test-coreml-delegate
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-13-xlarge
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
BUILD_TOOL=cmake

bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh

test-pybind-build-macos:
name: test-pybind-build-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 180
script: |
bash .ci/scripts/setup-conda.sh

# build module for executorch.extension.pybindings.portable_lib
BUILD_TOOL=${{ matrix.build-tool }}
EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

# see if we can import the module successfully
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"

test-llama-runner-macos:
name: test-llama-runner-mac
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack+kv+custom]
build-tool: [cmake]
mode: [mps]
fail-fast: false
with:
runner: macos-m1-stable
Expand All @@ -234,15 +51,31 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
bash .ci/scripts/setup-conda.sh

DTYPE=${{ matrix.dtype }}
BUILD_TOOL=${{ matrix.build-tool }}
MODE=${{ matrix.mode }}

if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# TODO: Will add more modes that don't support buck2
if [[ "${MODE}" == "mps" ]]; then
echo "mps doesn't support buck2."
exit 0
fi
fi

bash .ci/scripts/setup-conda.sh

# Setup executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

if [[ "${MODE}" == "mps" ]]; then
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
echo "Finishing installing mps."
else
echo "Not mps mode, skip installing mps."
fi

# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# Test llama2
Expand Down
4 changes: 2 additions & 2 deletions examples/models/llama2/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def load_llama_model(
checkpoint or checkpoint_dir
) and params_path, "Both checkpoint/checkpoint_dir and params can't be empty"
logging.info(
f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}"
f"Loading model with checkpoint={checkpoint}, params={params_path}, use_kv_cache={use_kv_cache}, weight_type={weight_type}, enable_dynamic_shape={enable_dynamic_shape}"
)
model, example_inputs, _ = EagerModelFactory.create_model(
"llama2",
Expand Down Expand Up @@ -228,7 +228,7 @@ def _get_dynamic_shape(self) -> Any:
if self.enable_dynamic_shape:
return ({1: dim}, {0: dim})
else:
None
return None
else:
return ({1: dim},)

Expand Down
4 changes: 4 additions & 0 deletions examples/models/llama2/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
get_quant_embedding_transform,
get_quant_weight_transform,
)
from .source_transformation.kv_cache import (
replace_kv_cache_with_dynamic_kv_cache
)
from .source_transformation.rope import materialze_broadcast_of_rope_freq_cis
from .source_transformation.sdpa import (
replace_causal_mask,
Expand Down Expand Up @@ -359,6 +362,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
transforms.append(materialze_broadcast_of_rope_freq_cis)

if args.use_sdpa_with_kv_cache:
# transforms.append(replace_kv_cache_with_dynamic_kv_cache)
transforms.append(replace_sdpa_with_custom_op)

if args.use_kv_cache:
Expand Down
Loading
Loading