Skip to content

Commit 206cde5

Browse files
authored
Merge branch 'main' into arm-passes-init
2 parents db3e38d + 05277dd commit 206cde5

File tree

78 files changed

+2435
-1302
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2435
-1302
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
08434df1f2f88c9770e59246caa2ff9c6f613270
1+
295f2ed4d103017f7e19a7b8263ece606cd629db

.ci/docker/common/install_android.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ install_sdk() {
7070
# These are the tools needed to build Android apps
7171
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platforms;android-34"
7272
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;33.0.1"
73+
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;35.0.0"
7374
# And some more tools for future emulator tests
7475
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platform-tools"
7576
yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "tools"

.ci/docker/conda-env-ci.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake=3.22.1
1+
cmake=3.26.4
22
ninja=1.10.2
33
libuv
44
llvm-openmp
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
13+
14+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
15+
PYTHON_EXECUTABLE=python3
16+
fi
17+
18+
which "${PYTHON_EXECUTABLE}"
19+
20+
# Update tokenizers submodule
21+
pushd $EXECUTORCH_ROOT/extension/llm/tokenizers
22+
echo "Update tokenizers submodule"
23+
git submodule update --init
24+
popd
25+
26+
# Install ET with CMake
27+
cmake -DPYTHON_EXECUTABLE=python \
28+
-DCMAKE_INSTALL_PREFIX=cmake-out \
29+
-DEXECUTORCH_ENABLE_LOGGING=1 \
30+
-DCMAKE_BUILD_TYPE=Release \
31+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
33+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
34+
-DEXECUTORCH_BUILD_XNNPACK=OFF \
35+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
36+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
37+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
38+
-Bcmake-out .
39+
cmake --build cmake-out -j16 --target install --config Release
40+
41+
# Install llama runner with torchao
42+
cmake -DPYTHON_EXECUTABLE=python \
43+
-DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
44+
-DCMAKE_BUILD_TYPE=Release \
45+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
46+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
47+
-DEXECUTORCH_BUILD_XNNPACK=OFF \
48+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
49+
-DEXECUTORCH_BUILD_TORCHAO=ON \
50+
-Bcmake-out/examples/models/llama \
51+
examples/models/llama
52+
cmake --build cmake-out/examples/models/llama -j16 --config Release
53+
54+
# Download stories llama110m artifacts
55+
download_stories_model_artifacts
56+
57+
echo "Creating tokenizer.bin"
58+
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
59+
60+
# Export model
61+
LLAMA_CHECKPOINT=stories110M.pt
62+
LLAMA_PARAMS=params.json
63+
MODEL_OUT=model.pte
64+
TOKENIZER=tokenizer.bin
65+
66+
# Set low-bit quantization parameters
67+
QLINEAR_BITWIDTH=3 # Can be 1-8
68+
QLINEAR_GROUP_SIZE=128 # Must be multiple of 16
69+
QEMBEDDING_BITWIDTH=4 # Can be 1-8
70+
QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16
71+
72+
${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
73+
--checkpoint "${LLAMA_CHECKPOINT:?}" \
74+
--params "${LLAMA_PARAMS:?}" \
75+
-kv \
76+
--use_sdpa_with_kv_cache \
77+
--output_name=${MODEL_OUT} \
78+
-qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
79+
--group_size ${QLINEAR_GROUP_SIZE} \
80+
-E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
81+
--disable_dynamic_shape \
82+
-d fp32
83+
84+
# Test run
85+
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path=$TOKENIZER --prompt="Once upon a time,"

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: android-perf
22

33
on:
44
schedule:
5-
- cron: 0 0 * * *
5+
- cron: 0 0,8,16 * * *
66
pull_request:
77
paths:
88
- .github/workflows/android-perf.yml

.github/workflows/trunk.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ jobs:
2323
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
2424
strategy:
2525
matrix:
26-
# Mac runners are expensive and limited, and non reliable.
27-
# Do some basic testing for macos jobs, and rely mostly on
26+
# Mac runners are expensive and limited, and non reliable.
27+
# Do some basic testing for macos jobs, and rely mostly on
2828
# test-models-linux-aarch64 job instead.
2929
model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
3030
backend: [xnnpack-quantization-delegation]
@@ -288,6 +288,26 @@ jobs:
288288
# Test ANE llama
289289
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
290290
291+
test-llama-torchao-lowbit:
292+
name: test-llama-torchao-lowbit
293+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
294+
with:
295+
runner: macos-m1-stable
296+
python-version: '3.11'
297+
submodules: 'true'
298+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
299+
script: |
300+
set -eux
301+
bash .ci/scripts/setup-conda.sh
302+
eval "$(conda shell.bash hook)"
303+
304+
# Install requirements
305+
${CONDA_RUN} python install_executorch.py
306+
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
307+
308+
# Run test
309+
${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
310+
291311
test-llama-runner-linux:
292312
# Test Both linux x86 and linux aarch64
293313
name: test-llama-runner-linux

backends/arm/operator_support/convolution_support.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
3434

3535
for pad in output_padding:
3636
if pad != 0:
37+
self.reporter.report_reject(
38+
node, "Convolutions with non-zero output padding not implemented."
39+
)
3740
return False
3841

3942
# Hardware specific constraints
@@ -56,19 +59,33 @@ def _is_node_supported_u55(self, node: fx.Node):
5659
# Depthwise convolution
5760
for dim in shape_in[1:]:
5861
if not 1 <= dim <= 65536:
62+
self.reporter.report_reject(
63+
node,
64+
f"Depthwise convolution must have CWH <= 65536, got {dim})",
65+
)
5966
return False
6067
else:
6168
# Convolution
6269
if not 1 <= C_in <= 65536:
70+
self.reporter.report_reject(
71+
node, f"Convolution must have C <= 65536, got {C_in})"
72+
)
6373
return False
6474

6575
kernel_w = kernel[2]
6676
kernel_h = kernel[3] if len(kernel) > 3 else 1
6777
# Kernel condition misses constraint on sum of absolute weights
6878
if not 1 <= kernel_h <= 64 or not 1 <= kernel_w * kernel_h <= 4096:
79+
self.reporter.report_reject(
80+
node,
81+
f"Convolution needs to have kernel_y<=64, kernel_x*kernel_y<=4096, got kernel ({kernel_w}, {kernel_h})",
82+
)
6983
return False
7084

7185
if not self._stride_condition(node):
86+
self.reporter.report_reject(
87+
node, "Failed condition on stride, pad and dilation combination."
88+
)
7289
return False
7390

7491
return True

backends/arm/operator_support/pool_2d_support.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,35 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
5454
if len(node.args) > 3:
5555
# Padding case
5656
if not all(1 <= k <= 8 for k in kernel):
57+
self.reporter.report_reject(
58+
node, f"Avgpool2d with padding needs kernel dims < 8, got {kernel}"
59+
)
5760
return False
5861
else:
5962
if not kernel_check(kernel):
63+
self.reporter.report_reject(
64+
node,
65+
f"Avgpool2d needs kernel_y < 256, kernel_x*kernel_y<=65536, got {kernel}",
66+
)
6067
return False
6168

62-
return dim_check(shape) and shape[0] == 1 and stride_check(stride)
69+
if not dim_check(shape):
70+
self.reporter.report_reject(
71+
node,
72+
f"Avgpool2d needs N == 1, rest dims <= 65536, got shape {list(shape)}",
73+
)
74+
return False
75+
if not stride_check(stride):
76+
self.reporter.report_reject(
77+
node, f"Avgpool2d needs stride <= 3, got {stride}"
78+
)
79+
return False
80+
if not shape[0] == 1:
81+
self.reporter.report_reject(
82+
node, f"Avgpool2d needs N==1, got N=={shape[0]}"
83+
)
84+
return False
85+
return True
6386

6487

6588
@register_tosa_support_check
@@ -82,4 +105,21 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
82105
kernel = cast(tuple[int, int], node.args[1])
83106
stride = cast(tuple[int, int], node.args[2])
84107

85-
return kernel_check(kernel) and dim_check(shape) and stride_check(stride)
108+
if not kernel_check(kernel):
109+
self.reporter.report_reject(
110+
node,
111+
f"Maxpool2d needs kernel_y < 256, kernel_x*kernel_y<=65536, got {kernel}",
112+
)
113+
return False
114+
if not dim_check(shape):
115+
self.reporter.report_reject(
116+
node,
117+
f"Maxpool2d needs N == 1, rest dims <= 65536, got shape {list(shape)}",
118+
)
119+
return False
120+
if not stride_check(stride):
121+
self.reporter.report_reject(
122+
node, f"Maxpool2d needs stride <= 3, got {stride}"
123+
)
124+
return False
125+
return True

backends/arm/operator_support/reduce_sum_support.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
3434

3535
for dim in dim_list:
3636
if not 1 <= input_shape[dim] <= 65536:
37+
self.reporter.report_reject(
38+
node, f"sum needs dims < 65536, got shape {input_shape}"
39+
)
3740
return False
3841

3942
# We can't be certain of which dim is the last in memory yet,
@@ -45,7 +48,9 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
4548
for length in input_shape[dim + 1 :]:
4649
post_R_product *= length
4750
if not 1 <= pre_R_product <= 65536:
51+
self.reporter.report_reject(node, "Failed dim check")
4852
return False
4953
if not 1 <= post_R_product <= 65536:
54+
self.reporter.report_reject(node, "Failed dim check")
5055
return False
5156
return True

backends/arm/operator_support/to_copy_support.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ def is_node_tosa_supported(
7575
) -> bool:
7676
assert node.target in self.targets
7777

78-
if tosa_spec not in self.tosa_specs:
79-
return False
80-
8178
assert tosa_spec.support_integer()
8279
supported_dtypes = (
8380
self.ALL_SUPPORTED_TYPES
@@ -97,30 +94,32 @@ def is_node_tosa_supported(
9794
assert isinstance(input_val, torch._subclasses.FakeTensor)
9895
input_dtype = input_val.dtype
9996
if input_dtype not in supported_dtypes:
100-
logger.info(
101-
f"Input dtype {input_val.dtype} is not supported in "
102-
f"{node.target.name()}." # type: ignore[union-attr] # pyre-ignore[16]
97+
self.reporter.report_reject(
98+
node,
99+
f"Input dtype {input_val.dtype} is not supported in {node.target}.",
103100
)
104101
return False
105102

106103
# Check output type
107104
output_val = node.meta["val"]
108105
assert isinstance(output_val, torch._subclasses.FakeTensor)
109106
if output_val.dtype not in supported_dtypes[input_dtype]:
110-
logger.info(
107+
self.reporter.report_reject(
108+
node,
111109
f"Output dtype {output_val.dtype} is not supported in "
112-
f"{node.target.name()} for input dtype {input_dtype}. " # type: ignore[union-attr] # pyre-ignore[16]
110+
f"{node.target} for input dtype {input_dtype}. "
113111
f"Supported output types: "
114-
f"{''.join(str(t) for t in supported_dtypes[input_dtype])}"
112+
f"{''.join(str(t) for t in supported_dtypes[input_dtype])}",
115113
)
116114
return False
117115

118116
# Check memory format (to_copy)
119117
if "memory_format" in node.kwargs:
120118
if node.kwargs["memory_format"] in (torch.preserve_format,):
121-
logger.info(
119+
self.reporter.report_reject(
120+
node,
122121
f"Argument 'memory_format' is not supported for "
123-
f"{node.target.name()} right now." # type: ignore[union-attr] # pyre-ignore[16]
122+
f"{node.target} right now.",
124123
)
125124
return False
126125

@@ -129,9 +128,10 @@ def is_node_tosa_supported(
129128
dim_order = node.kwargs["dim_order"]
130129
# pyre-ignore[6]
131130
if dim_order != list(range(len(dim_order))): # type: ignore[arg-type]
132-
logger.info(
131+
self.reporter.report_reject(
132+
node,
133133
f"Argument {dim_order=} is not supported for "
134-
f"{node.target.name()} right now." # type: ignore[union-attr] # pyre-ignore[16]
134+
f"{node.target} right now.",
135135
)
136136
return False
137137

0 commit comments

Comments
 (0)