Skip to content

Commit 43cec8e

Browse files
authored
Merge branch 'main' into sigmoid_flaky_fix
2 parents bd0bd59 + e1738cc commit 43cec8e

File tree

79 files changed

+2586
-1049
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+2586
-1049
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ set_up_aot() {
3939
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4141
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
42-
-DPYTHON_EXECUTABLE=python3 \
43-
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
42+
-DPYTHON_EXECUTABLE=python3
4443
cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
4544
# install Python APIs to correct import path
4645
# The filename might vary depending on your Python and host version.

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ test_model_with_qnn() {
201201
# TODO(guangyang): Make QNN chipset matches the target device
202202
QNN_CHIPSET=SM8450
203203

204-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
204+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
205205
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
206206
}
207207

.github/workflows/trunk.yml

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,11 @@ jobs:
555555
strategy:
556556
matrix:
557557
hf_model_id: [
558-
google/gemma-2-2b,
559-
Qwen/Qwen2.5-0.5B,
558+
google/gemma-3-1b-it,
559+
Qwen/Qwen3-0.6B,
560560
HuggingFaceTB/SmolLM2-135M,
561561
meta-llama/Llama-3.2-1B,
562-
allenai/OLMo-1B-hf
562+
allenai/OLMo-1B-hf,
563563
]
564564
fail-fast: false
565565
with:
@@ -569,44 +569,102 @@ jobs:
569569
submodules: 'recursive'
570570
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
571571
timeout: 90
572+
upload-artifact: profiling-artifacts-${{ strategy.job-index }}
572573
script: |
573574
echo "::group::Set up ExecuTorch"
574575
# The generic Linux job chooses to use base env, not the one setup by the image
575576
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
576577
conda activate "${CONDA_ENV}"
577578
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
579+
# Build executor_runner with ETdump enabled
580+
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
581+
-DCMAKE_INSTALL_PREFIX=cmake-out \
582+
-DEXECUTORCH_ENABLE_LOGGING=1 \
583+
-DCMAKE_BUILD_TYPE=Release \
584+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
585+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
586+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
587+
-DEXECUTORCH_BUILD_XNNPACK=ON \
588+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
589+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
590+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
591+
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
592+
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
593+
-Bcmake-out .
594+
cmake --build cmake-out -j16 --target install --config Release
578595
echo "::endgroup::"
579596
580597
echo "::group::Set up Hugging Face"
581598
pip install -U "huggingface_hub[cli]"
582599
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
583600
git clone https://github.com/huggingface/optimum-executorch
584-
cd optimum-executorch
601+
pushd optimum-executorch
585602
# There is no release yet, for CI stability, always test from the same commit on main
586-
git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
603+
git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587604
pip install .[tests]
605+
popd
606+
607+
if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
608+
# Fixes for gemma-3 is not available in the released version
609+
git clone https://github.com/huggingface/transformers.git
610+
pushd transformers
611+
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
612+
pip install -e .
613+
popd
614+
fi
588615
pip list
589616
echo "::endgroup::"
590617
591-
echo "::group::Export and Run ${{ matrix.hf_model_id }}"
618+
echo "::group::Export to ExecuTorch"
592619
# Pass matrix variable as environment variable
593620
export MODEL_ID="${{ matrix.hf_model_id }}"
621+
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
622+
pushd optimum-executorch
623+
624+
optimum-cli export executorch \
625+
--model ${MODEL_ID} \
626+
--task text-generation \
627+
--recipe xnnpack \
628+
--use_custom_sdpa \
629+
--output_dir ${OUTPUT_DIR} \
630+
--qlinear
631+
632+
ls -FlAGhp ${OUTPUT_DIR}
633+
popd
634+
echo "::endgroup::"
635+
636+
echo "::group::Inference using python API"
637+
pushd optimum-executorch
594638
python -c "
595639
import os
596640
from optimum.executorch import ExecuTorchModelForCausalLM
597641
from transformers import AutoTokenizer
598642
599643
model_id = os.getenv('MODEL_ID')
600-
print(f'Loading model: {model_id}')
601-
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
602-
tokenizer = AutoTokenizer.from_pretrained(model_id)
644+
pte_dir = os.getenv('OUTPUT_DIR')
645+
print(f'Loading model {model_id} from {pte_dir}.')
646+
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
603647
generated_text = model.text_generation(
604-
tokenizer=tokenizer,
648+
tokenizer=AutoTokenizer.from_pretrained(model_id),
605649
prompt='Simply put, the theory of relativity states that',
606650
max_seq_len=64
607651
)
608652
print(generated_text)
609653
"
654+
popd
655+
echo "::endgroup::"
656+
657+
echo "::group::Inference using executor_runner with ETDump"
658+
./cmake-out/executor_runner \
659+
--model_path ${OUTPUT_DIR}/model.pte \
660+
--etdump_path ${OUTPUT_DIR}/etdump.etdp
661+
662+
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
663+
mkdir -p $(dirname "$TSV_PATH")
664+
python3 -m devtools.inspector.inspector_cli \
665+
--etdump_path ${OUTPUT_DIR}/etdump.etdp \
666+
--tsv_path ${TSV_PATH}
667+
610668
echo "::endgroup::"
611669
612670

CMakeLists.txt

Lines changed: 16 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ project(executorch)
4949

5050
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
5151

52+
if(NOT CMAKE_CXX_STANDARD)
53+
set(CMAKE_CXX_STANDARD 17)
54+
endif()
55+
announce_configured_options(CMAKE_CXX_STANDARD)
56+
57+
if(NOT CMAKE_BUILD_TYPE)
58+
set(CMAKE_BUILD_TYPE Debug)
59+
endif()
60+
announce_configured_options(CMAKE_BUILD_TYPE)
61+
62+
announce_configured_options(CMAKE_CXX_COMPILER_ID)
63+
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
64+
announce_configured_options(BUCK2)
65+
announce_configured_options(PYTHON_EXECUTABLE)
66+
5267
load_build_preset()
5368
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
5469

@@ -63,14 +78,6 @@ include(ExternalProject)
6378

6479
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
6580

66-
if(NOT CMAKE_CXX_STANDARD)
67-
set(CMAKE_CXX_STANDARD 17)
68-
endif()
69-
70-
if(NOT CMAKE_BUILD_TYPE)
71-
set(CMAKE_BUILD_TYPE Debug)
72-
endif()
73-
7481
# Setup RPATH.
7582
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
7683
# Use separate rpaths during build and install phases
@@ -128,22 +135,6 @@ else()
128135
set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
129136
endif()
130137

131-
#
132-
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
133-
#
134-
cmake_dependent_option(
135-
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
136-
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
137-
)
138-
139-
#
140-
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
141-
#
142-
cmake_dependent_option(
143-
EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
144-
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
145-
)
146-
147138
add_subdirectory(third-party)
148139

149140
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
@@ -475,14 +466,6 @@ install(
475466
)
476467
install(FILES tools/cmake/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
477468

478-
#
479-
# executor_runner: Host tool that demonstrates program execution.
480-
#
481-
cmake_dependent_option(
482-
EXECUTORCH_BUILD_EXECUTOR_RUNNER "Build the executor_runner executable" ON
483-
"NOT CMAKE_TOOLCHAIN_IOS" OFF
484-
)
485-
486469
# Add googletest if any test targets should be built
487470
if(BUILD_TESTING)
488471
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
@@ -571,9 +554,7 @@ if(EXECUTORCH_BUILD_EXTENSION_TENSOR)
571554
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/tensor)
572555
endif()
573556

574-
if(EXECUTORCH_BUILD_PTHREADPOOL
575-
AND EXECUTORCH_BUILD_CPUINFO
576-
)
557+
if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
577558
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
578559
endif()
579560

@@ -738,6 +719,3 @@ if(EXECUTORCH_BUILD_VULKAN)
738719
endif()
739720

740721
include(Test.cmake)
741-
742-
# Print all summary
743-
executorch_print_configuration_summary()

CODEOWNERS

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515
/backends/vulkan @SS-JIA
1616
/backends/xnnpack @digantdesai @mcr229
1717

18-
/build @GregoryComer @kirklandsign
19-
20-
/codegen @larryliu0820 @lucylq
21-
2218
/devtools @tarun292 @Gasoonjia
2319

2420
/docs @mergennachin
@@ -41,7 +37,6 @@
4137
/exir/backend @cccclai @kimishpatel @JacobSzwejbka @tarun292
4238
/exir @JacobSzwejbka @tarun292 @larryliu0820
4339

44-
4540
/extension/android @kirklandsign
4641
/extension/android_test @kirklandsign
4742
/extension/apple @shoumikhin
@@ -83,3 +78,11 @@
8378
/test @larryliu0820 @kirklandsign
8479

8580
/util @tarun292
81+
82+
# Build System -----------------------------------------------------------------
83+
84+
CMakeLists.txt @jathu @larryliu0820 @kirklandsign
85+
CMakePresets.json @jathu @larryliu0820 @kirklandsign
86+
87+
/codegen @larryliu0820 @lucylq
88+
/tools/cmake @jathu @larryliu0820 @kirklandsign

backends/arm/_passes/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ python_library(
77
"//executorch/backends/arm:tosa_quant_utils",
88
"//executorch/backends/arm:tosa_utils",
99
"//executorch/backends/transforms:fuse_view_copy",
10+
"//executorch/backends/transforms:remove_getitem_op",
1011
"//executorch/backends/transforms:replace_scalar_with_tensor",
1112
"//executorch/backends/xnnpack/_passes:xnnpack_passes",
1213
"//executorch/exir:lib",

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .decompose_softmax_pass import DecomposeSoftmaxPass # noqa
3333
from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass # noqa
3434
from .decompose_sqrt_pass import DecomposeSqrtPass # noqa
35+
from .decompose_sum_pass import DecomposeSumPass # noqa
3536
from .decompose_var_pass import DecomposeVarPass # noqa
3637
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
3738
FoldAndAnnotateQParamsPass,
@@ -44,10 +45,8 @@
4445
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
4546
from .insert_rescales_pass import InsertRescalePass # noqa
4647
from .insert_table_ops import InsertTableOpsPass # noqa
47-
from .keep_dims_false_to_squeeze_pass import KeepDimsFalseToSqueezePass # noqa
4848
from .match_arg_ranks_pass import MatchArgRanksPass # noqa
4949
from .match_where_self_arg_dtype_pass import MatchWhereSelfDtypePass # noqa
50-
from .meandim_to_averagepool_pass import ConvertMeanDimToAveragePoolPass # noqa
5150
from .mm_to_bmm_pass import ConvertMmToBmmPass # noqa
5251
from .remove_clone_pass import RemoveClonePass # noqa
5352
from .replace_scalar_with_tensor_pass import ( # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
ConvertAnyDefaultDimDimsPass,
1818
ConvertExpandCopyToRepeatPass,
1919
ConvertFullLikeToFullPass,
20-
ConvertMeanDimToAveragePoolPass,
2120
ConvertMinMaxPass,
2221
ConvertMmToBmmPass,
2322
ConvertSplitToSlicePass,
@@ -37,6 +36,7 @@
3736
DecomposeSoftmaxPass,
3837
DecomposeSoftmaxUnstablePass,
3938
DecomposeSqrtPass,
39+
DecomposeSumPass,
4040
DecomposeVarPass,
4141
FoldAndAnnotateQParamsPass,
4242
FuseBatchnorm2DPass,
@@ -45,7 +45,6 @@
4545
FuseQuantizedActivationPass,
4646
InsertRescalePass,
4747
InsertTableOpsPass,
48-
KeepDimsFalseToSqueezePass,
4948
MatchArgRanksPass,
5049
MatchWhereSelfDtypePass,
5150
QuantizeOperatorArguments,
@@ -60,7 +59,7 @@
6059
UnsqueezeScalarPlaceholdersPass,
6160
)
6261

63-
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
62+
from executorch.backends.arm.tosa_specification import TosaSpecification
6463
from executorch.backends.transforms.decompose_sdpa import (
6564
DecomposeScaledDotProductAttention,
6665
)
@@ -87,13 +86,13 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
8786
self.add_pass(ConvertSplitToSlicePass())
8887
self.add_pass(ConvertMmToBmmPass())
8988
self.add_pass(DecomposeLinearPass())
90-
self.add_pass(ConvertMeanDimToAveragePoolPass())
89+
self.add_pass(DecomposeMeanDimPass())
9190
self.add_pass(ConvertFullLikeToFullPass())
9291
self.add_pass(ConvertToClampPass())
9392
self.add_pass(ConvertMinMaxPass())
9493
self.add_pass(ConvertAnyDefaultDimDimsPass())
9594
self.add_pass(MatchWhereSelfDtypePass())
96-
if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
95+
if self.tosa_spec.is_U55_subset:
9796
self.add_pass(CastToInt32Pass())
9897

9998
self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
@@ -110,7 +109,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
110109
self.add_pass(ConvertExpandCopyToRepeatPass())
111110
self.add_pass(UnsqueezeBeforeRepeatPass())
112111
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
113-
self.add_pass(KeepDimsFalseToSqueezePass())
112+
self.add_pass(DecomposeSumPass())
114113
self.add_pass(Conv1dUnsqueezePass(exported_program))
115114
self.add_pass(DecomposeSelectPass())
116115
self.add_pass(ConvertSqueezesToViewPass())
@@ -140,7 +139,6 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
140139
self.add_pass(DecomposeVarPass())
141140
self.add_pass(DecomposeMeanDimPass())
142141
self.add_pass(DecomposeNotEqualPass())
143-
self.add_pass(ConvertMeanDimToAveragePoolPass())
144142
self.add_pass(DecomposeDivPass())
145143
self.add_pass(DecomposeSoftmaxPass())
146144
self.add_pass(DecomposeGeluPass())
@@ -163,7 +161,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
163161
self.add_pass(ConvertExpandCopyToRepeatPass())
164162
self.add_pass(UnsqueezeBeforeRepeatPass())
165163
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
166-
self.add_pass(KeepDimsFalseToSqueezePass())
164+
self.add_pass(DecomposeSumPass())
167165
self.add_pass(Conv1dUnsqueezePass(exported_program))
168166
self.add_pass(DecomposeSelectPass())
169167
self.add_pass(ConvertSqueezesToViewPass())
@@ -212,12 +210,14 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
212210
self.add_pass(DecomposeSqrtPass())
213211
self.add_pass(DecomposeSiluPass())
214212

215-
if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
213+
if self.tosa_spec.is_U55_subset:
216214
# Numerically stable softmax uses amax which is not supported on Ethos-U55
217215
self.add_pass(DecomposeSoftmaxUnstablePass())
218216
else:
219217
self.add_pass(DecomposeSoftmaxPass())
220218

221219
self.add_pass(ConvertMinMaxPass())
222220
self.add_pass(ReplaceInfValues())
221+
self.add_pass(DecomposeSumPass())
222+
223223
return self._transform(graph_module)

0 commit comments

Comments
 (0)