Skip to content

Commit dfa286b

Browse files
Qualcomm AI Engine Direct - Support Qnn IR backend in online preparation
- Support Qnn IR backend - Replace QCir with Dlc in online prepare flow - Fix SDK version checking - Add config for Saver backend - Block online preparation if the QNN version is below 2.30. - Change the QNN version from 2.28 to 2.31 in CI to align with QnnIR
1 parent 73740e9 commit dfa286b

35 files changed

+667
-196
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ set -o xtrace
1212
build_qnn_backend() {
1313
echo "Start building qnn backend."
1414
export ANDROID_NDK_ROOT=/opt/ndk
15-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
15+
export QNN_SDK_ROOT=/tmp/qnn/2.31.0.250130
1616
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1717

1818
# Workaround to avoid issues around missing flatccrt library (depending on the

.ci/scripts/setup-qnn-deps.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ install_qnn() {
1616
QNN_INSTALLATION_DIR=/tmp/qnn
1717
mkdir -p "${QNN_INSTALLATION_DIR}"
1818

19-
curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
19+
curl -Lo /tmp/v2.31.0.25.01.30.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.31.0.250130.zip"
2020
echo "Finishing downloading qnn sdk."
21-
unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
21+
unzip -qo /tmp/v2.31.0.25.01.30.zip -d /tmp
2222
echo "Finishing unzip qnn sdk."
2323

2424

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ echo "COREML option ${COREML}"
123123
if [[ "${MODE}" =~ .*qnn.* ]]; then
124124
QNN=ON
125125
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
126-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
126+
export QNN_SDK_ROOT=/tmp/qnn/2.31.0.250130
127127
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
128128
export PYTHONPATH=".."
129129
cp schema/program.fbs exir/_serialize/program.fbs

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ set -exu
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

1212
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
13-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
13+
export QNN_SDK_ROOT=/tmp/qnn/2.31.0.250130
1414
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
1515
export PYTHONPATH=".."
1616
cp schema/program.fbs exir/_serialize/program.fbs

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ jobs:
216216
--output_name="${OUT_ET_MODEL_NAME}.pte"
217217
ls -lh "${OUT_ET_MODEL_NAME}.pte"
218218
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
219-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
219+
export QNN_SDK_ROOT=/tmp/qnn/2.31.0.250130
220220
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
221221
export PYTHONPATH=$(pwd)/..
222222

backends/qualcomm/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ endif()
7373

7474
include_directories(
7575
BEFORE ${_common_include_directories} ${QNN_SDK_ROOT}/include/QNN
76+
${QNN_SDK_ROOT}/share/QNN/converter/jni
7677
${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
7778
${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10
7879
)
@@ -176,6 +177,7 @@ target_link_libraries(
176177
qnn_factory PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
177178
qnn_mem_manager qnn_custom_protocol
178179
)
180+
179181
target_link_libraries(
180182
qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
181183
)

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
3232
.def(py::init<const py::bytes&, const py::bytes&>())
3333
.def(py::init<const py::bytes&, const py::list&>())
3434
.def("Init", &PyQnnManager::Init)
35+
.def("GetQnnAPIVersion", &PyQnnManager::GetQnnAPIVersion)
3536
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
3637
.def("Compile", py::overload_cast<>(&PyQnnManager::Compile))
3738
.def(

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <pybind11/stl.h>
1919
#include <memory>
2020
#include <string_view>
21+
#include "QnnTypes.h"
2122

2223
namespace py = pybind11;
2324
namespace executorch {
@@ -34,7 +35,7 @@ class PyQnnManager {
3435
auto qnn_executorch_options = GetQnnExecuTorchOptions(
3536
qnn_executorch_option_ptr_.cast<std::string_view>().data());
3637
qnn_manager_ = std::make_shared<QnnManager>(
37-
qnn_executorch_options, qnn_executorch_context_binary_);
38+
qnn_executorch_options, qnn_executorch_context_binary_, false);
3839
}
3940

4041
// used for loading context binary directly
@@ -47,7 +48,7 @@ class PyQnnManager {
4748
qnn_executorch_context_binary_.buffer = info.ptr;
4849
qnn_executorch_context_binary_.nbytes = info.size * info.itemsize;
4950
qnn_manager_ = std::make_shared<QnnManager>(
50-
qnn_executorch_options, qnn_executorch_context_binary_);
51+
qnn_executorch_options, qnn_executorch_context_binary_, false);
5152
}
5253

5354
// used during stage 2 of multi-graph mode
@@ -160,7 +161,12 @@ class PyQnnManager {
160161
qnn_executorch_context_binary_ =
161162
MakeQcirCustomBinaryInfo(qcir_bin, tensor_data);
162163
qnn_manager_ = std::make_shared<QnnManager>(
163-
qnn_executorch_options, qnn_executorch_context_binary_);
164+
qnn_executorch_options, qnn_executorch_context_binary_, false);
165+
}
166+
167+
std::vector<int> GetQnnAPIVersion() {
168+
return {
169+
QNN_API_VERSION_MAJOR, QNN_API_VERSION_MINOR, QNN_API_VERSION_PATCH};
164170
}
165171

166172
executorch::runtime::Error Init() {
@@ -195,7 +201,7 @@ class PyQnnManager {
195201
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
196202
QnnExecuTorchContextBinary binary_info;
197203

198-
if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) {
204+
if (qnn_manager_->IsMultipleGraphs()) {
199205
builder_.Reset();
200206
std::vector<uint8_t> tensor_data;
201207
std::vector<uint64_t> offsets;
@@ -305,8 +311,11 @@ class PyQnnManager {
305311
QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
306312
return py::array_t<char>(0);
307313
}
308-
if (qnn_manager_->GetContextBinary(binary_info) !=
309-
executorch::runtime::Error::Ok) {
314+
auto qnn_executorch_options = GetQnnExecuTorchOptions(
315+
qnn_executorch_option_ptr_.cast<std::string_view>().data());
316+
if (qnn_executorch_options->saver() ||
317+
qnn_manager_->GetContextBinary(binary_info) !=
318+
executorch::runtime::Error::Ok) {
310319
return py::array_t<char>(0);
311320
}
312321
}

backends/qualcomm/qnn_preprocess.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors
2121
from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
2222
from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option
23+
from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
24+
flatbuffer_to_option,
25+
)
2326
from executorch.exir.backend.backend_details import (
2427
BackendDetails,
2528
CompileSpec,
@@ -43,6 +46,13 @@ def preprocess(
4346
) -> PreprocessResult:
4447
option = generate_qnn_executorch_option(compile_specs)
4548
qnn_manager = PyQnnManager.QnnManager(option)
49+
obj_options = flatbuffer_to_option(option)
50+
51+
core_api_version = qnn_manager.GetQnnAPIVersion()
52+
assert not obj_options.online_prepare or (
53+
core_api_version[0] >= 2 and core_api_version[1] >= 23
54+
), "Online prepare is disabled for Qnn API versions below 2.23.0."
55+
4656
qnn_manager.Init()
4757

4858
# QNN Delegate Specific Passes
@@ -107,6 +117,11 @@ def preprocess(
107117
qnn_manager.GetGraphNames()[0],
108118
[py_op_wrapper.GetOpWrapper() for py_op_wrapper in py_op_wrapper_list],
109119
)
120+
121+
if obj_options.saver:
122+
exit(
123+
f"Records all QNN API calls from saver backend at: {obj_options.saver_output_dir}"
124+
)
110125
assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
111126
qnn_manager.Destroy()
112127
# For now, debug_handle_map is not used by QNN ExecuTorch

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
3636
// covert SizedBuffer to qnn ExecuTorch option
3737
QnnExecuTorchContextBinary qnn_context_blob;
3838
const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
39-
4039
auto [status, signature, ctx_size, ctx_bin] =
4140
QnnContextCustomProtocol().DeserializeContextCustomBuffer(
4241
const_cast<void*>(processed->data()));
@@ -49,6 +48,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
4948
qnn_context_blob.buffer = ctx_bin;
5049
} else {
5150
// This buffer will be verified again in QnnBackendCache.
51+
5252
QNN_EXECUTORCH_LOG_INFO(
5353
"Deserializing processed data using QnnQcirCustomProtocol");
5454
qnn_context_blob.buffer = const_cast<void*>(processed->data());
@@ -71,8 +71,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
7171

7272
// NOTE: Since we use placement new and since this type is not trivially
7373
// destructible, we must call the destructor manually in destroy().
74-
new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
75-
74+
new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob, true);
7675
// TODO: this is a temporal solution for multi-graph support, will be
7776
// removed once framework starts to accept runtime configuration
7877
// ---
@@ -94,9 +93,9 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
9493

9594
if (qnn_manager->IsOnlinePrepare()) {
9695
ET_CHECK_OR_RETURN_ERROR(
97-
qnn_manager->CompileQcir() == Error::Ok,
96+
qnn_manager->CompileGraphsFromDlc() == Error::Ok,
9897
Internal,
99-
"Fail to compile binary in qcir format");
98+
"Fail to compile binary in Dlc format");
10099
} else {
101100
for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
102101
ET_CHECK_OR_RETURN_ERROR(

0 commit comments

Comments
 (0)