pytorch
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/apple/coreml/CMakeLists.txt
Lines changed: 39 additions & 14 deletions b/‎backends/apple/coreml/CMakeLists.txt
Lines changed: 39 additions & 14 deletions
diff --git a/‎backends/vulkan/vulkan_preprocess.py
Lines changed: 5 additions & 2 deletions b/‎backends/vulkan/vulkan_preprocess.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj
Lines changed: 16 additions & 8 deletions b/‎examples/apple/coreml/executor_runner/coreml_executor_runner.xcodeproj/project.pbxproj
Lines changed: 16 additions & 8 deletions
diff --git a/‎examples/apple/coreml/scripts/build_executor_runner.sh
Lines changed: 2 additions & 0 deletions b/‎examples/apple/coreml/scripts/build_executor_runner.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/models/llama/README.md
Lines changed: 1 addition & 2 deletions b/‎examples/models/llama/README.md
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/models/llama/export_llama_lib.py
Lines changed: 0 additions & 13 deletions b/‎examples/models/llama/export_llama_lib.py
Lines changed: 0 additions & 13 deletions
@@ -78,7 +78,6 @@ ${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
     -qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
     --group_size ${QLINEAR_GROUP_SIZE} \
     -E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
-    --disable_dynamic_shape \
     -d fp32
 
 # Test run
 
@@ -1,4 +1,9 @@
 # Copyright © 2023 Apple Inc. All rights reserved.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 cmake_minimum_required(VERSION 3.19)
 
@@ -111,32 +116,48 @@ set(PROTOBUF_SOURCES
     runtime/sdk/format/WordTagger.pb.cc
 )
 
+find_library(FOUNDATION_FRAMEWORK Foundation)
+
+# CoreML util
+add_library(coreml_util ${UTIL_SOURCES})
+target_include_directories(coreml_util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util)
+target_link_libraries(coreml_util PRIVATE ${FOUNDATION_FRAMEWORK})
+
+install(
+  TARGETS coreml_util
+  DESTINATION lib
+  INCLUDES
+  DESTINATION ${_common_include_directories}
+)
+
+# CoreML inmemoryfs
+add_library(coreml_inmemoryfs ${INMEMORYFS_SOURCES})
+target_include_directories(coreml_inmemoryfs PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inmemoryfs)
+target_link_libraries(coreml_inmemoryfs PRIVATE coreml_util ${FOUNDATION_FRAMEWORK})
+
+install(
+  TARGETS coreml_inmemoryfs
+  DESTINATION lib
+  INCLUDES
+  DESTINATION ${_common_include_directories}
+)
+
 # Define the delegate library
 add_library(coremldelegate)
-target_sources(
-  coremldelegate PRIVATE ${INMEMORYFS_SOURCES} ${KVSTORE_SOURCES}
-                         ${DELEGATE_SOURCES} ${UTIL_SOURCES}
-)
+target_sources(coremldelegate PRIVATE ${KVSTORE_SOURCES} ${DELEGATE_SOURCES})
 
 target_include_directories(
   coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/include
 )
 target_include_directories(
   coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kvstore
 )
-target_include_directories(
-  coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inmemoryfs
-)
 target_include_directories(
   coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/delegate
 )
-target_include_directories(
-  coremldelegate PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime/util
-)
 target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/..)
 target_include_directories(coremldelegate PRIVATE ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
 target_compile_definitions(coremldelegate PRIVATE C10_USING_CUSTOM_GENERATED_MACROS)
-target_link_libraries(coremldelegate PRIVATE executorch_core)
 
 if(EXECUTORCH_BUILD_DEVTOOLS)
   target_sources(coremldelegate PRIVATE ${SDK_SOURCES} ${PROTOBUF_SOURCES})
@@ -156,13 +177,17 @@ endif()
 
 find_library(ACCELERATE_FRAMEWORK Accelerate)
 find_library(COREML_FRAMEWORK CoreML)
-find_library(FOUNDATION_FRAMEWORK Foundation)
 find_library(SQLITE_LIBRARY sqlite3)
 
 target_link_libraries(
   coremldelegate
-  PRIVATE executorch_core ${ACCELERATE_FRAMEWORK} ${COREML_FRAMEWORK}
-          ${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}
+  PUBLIC  coreml_util
+          coreml_inmemoryfs
+  PRIVATE executorch_core
+          ${ACCELERATE_FRAMEWORK}
+          ${COREML_FRAMEWORK}
+          ${FOUNDATION_FRAMEWORK}
+          ${SQLITE_LIBRARY}
 )
 
 target_link_options_shared_lib(coremldelegate)
 
@@ -47,7 +47,7 @@
 )
 from executorch.exir.backend.utils import DelegateMappingBuilder
 
-from executorch.exir.memory_planning import greedy
+from executorch.exir.memory_planning import greedy, memory_planning_algorithm_suite
 from executorch.exir.pass_base import ExportPass, PassBase
 
 from executorch.exir.passes import MemoryPlanningPass, SpecPropPass
@@ -199,11 +199,14 @@ def preprocess(  # noqa: C901
         # Finally, apply dynamic shape passes and memory planning pass. These passes
         # must be applied only when the graph structure is finalized.
         greedy_memory_planning = partial(greedy, allow_overlapping_allocations=False)
+        mem_planning_suite = partial(
+            memory_planning_algorithm_suite, algo_list=[greedy_memory_planning]
+        )
         program = apply_passes(
             program,
             [
                 ConstraintBasedSymShapeEvalPass(),
-                MemoryPlanningPass(memory_planning_algo=greedy_memory_planning),
+                MemoryPlanningPass(memory_planning_algo=mem_planning_suite),
             ],
         )
 
 
@@ -9,6 +9,8 @@
 /* Begin PBXBuildFile section */
 		38626BB42B225A560059413D /* libflatccrt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 38626BB32B225A560059413D /* libflatccrt.a */; };
 		38626BB52B225A890059413D /* libetdump.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 38626BAF2B21C98F0059413D /* libetdump.a */; };
+		879121DA2D91DDBA001E6C66 /* libcoreml_inmemoryfs.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 879121D82D91DDBA001E6C66 /* libcoreml_inmemoryfs.a */; };
+		879121DB2D91DDBA001E6C66 /* libcoreml_util.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 879121D92D91DDBA001E6C66 /* libcoreml_util.a */; };
 		C94D51592ACF4BFC00AF47FD /* main.mm in Sources */ = {isa = PBXBuildFile; fileRef = C94D51582ACF4BFC00AF47FD /* main.mm */; };
 		C94D515E2ACFCBA000AF47FD /* libexecutorch.a in Frameworks */ = {isa = PBXBuildFile; fileRef = C94D515C2ACFCBA000AF47FD /* libexecutorch.a */; };
 		C94D51622ACFCBBA00AF47FD /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = C94D51612ACFCBBA00AF47FD /* libsqlite3.tbd */; };
@@ -36,6 +38,8 @@
 /* Begin PBXFileReference section */
 		38626BAF2B21C98F0059413D /* libetdump.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libetdump.a; path = libraries/libetdump.a; sourceTree = "<group>"; };
 		38626BB32B225A560059413D /* libflatccrt.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libflatccrt.a; path = "../../../../third-party/flatcc/lib/libflatccrt.a"; sourceTree = "<group>"; };
+		879121D82D91DDBA001E6C66 /* libcoreml_inmemoryfs.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcoreml_inmemoryfs.a; path = "libraries/libcoreml_inmemoryfs.a"; sourceTree = "<group>"; };
+		879121D92D91DDBA001E6C66 /* libcoreml_util.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcoreml_util.a; path = "libraries/libcoreml_util.a"; sourceTree = "<group>"; };
 		C94D514E2ACF4B9300AF47FD /* coreml_executor_runner */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = coreml_executor_runner; sourceTree = BUILT_PRODUCTS_DIR; };
 		C94D51582ACF4BFC00AF47FD /* main.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = main.mm; sourceTree = "<group>"; };
 		C94D515C2ACFCBA000AF47FD /* libexecutorch.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexecutorch.a; path = libraries/libexecutorch.a; sourceTree = "<group>"; };
@@ -54,6 +58,8 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				879121DA2D91DDBA001E6C66 /* libcoreml_inmemoryfs.a in Frameworks */,
+				879121DB2D91DDBA001E6C66 /* libcoreml_util.a in Frameworks */,
 				38626BB52B225A890059413D /* libetdump.a in Frameworks */,
 				F24817E72BC65B2000E80D98 /* libexecutorch_core.a in Frameworks */,
 				38626BB42B225A560059413D /* libflatccrt.a in Frameworks */,
@@ -91,6 +97,8 @@
 		C94D51602ACFCBBA00AF47FD /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
+				879121D82D91DDBA001E6C66 /* libcoreml_inmemoryfs.a */,
+				879121D92D91DDBA001E6C66 /* libcoreml_util.a */,
 				C988D69C2B998CD700979CF6 /* libprotobuf-lite.a */,
 				38626BB32B225A560059413D /* libflatccrt.a */,
 				38626BAF2B21C98F0059413D /* libetdump.a */,
@@ -214,7 +222,7 @@
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
-                                	"C10_USING_CUSTOM_GENERATED_MACROS",
+					C10_USING_CUSTOM_GENERATED_MACROS,
 					"$(inherited)",
 				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -273,7 +281,7 @@
 				GCC_C_LANGUAGE_STANDARD = gnu17;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_PREPROCESSOR_DEFINITIONS = (
-                                	"C10_USING_CUSTOM_GENERATED_MACROS",
+					C10_USING_CUSTOM_GENERATED_MACROS,
 					"$(inherited)",
 				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
@@ -297,9 +305,9 @@
 				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = YES;
 				HEADER_SEARCH_PATHS = (
-                                	"$(SRCROOT)/include",
-                                	"$(SRCROOT)/include/executorch/runtime/core/portable_type/c10",
-                                );
+					"$(SRCROOT)/include",
+					"$(SRCROOT)/include/executorch/runtime/core/portable_type/c10",
+				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/libraries",
@@ -319,9 +327,9 @@
 				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = YES;
 				HEADER_SEARCH_PATHS = (
-                                	"$(SRCROOT)/include",
-                                	"$(SRCROOT)/include/executorch/runtime/core/portable_type/c10",
-                                );
+					"$(SRCROOT)/include",
+					"$(SRCROOT)/include/executorch/runtime/core/portable_type/c10",
+				);
 				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/libraries",
 
@@ -66,6 +66,8 @@ find "$CMAKE_BUILD_DIR_PATH/" -name 'libexecutorch_core.a' -exec cp -f "{}" "$LI
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libprotobuf-lite.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libprotobuf-lite.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libprotobuf-lited.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libprotobuf-lite.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libetdump.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libetdump.a"  \;
+find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoreml_util.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libcoreml_util.a"  \;
+find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoreml_inmemoryfs.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libcoreml_inmemoryfs.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoremldelegate.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libcoremldelegate.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_ops_lib.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_ops_lib.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_kernels.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_kernels.a"  \;
 
@@ -382,7 +382,7 @@ Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-de
 
 ## Running with low-bit kernels
 
-We now give instructions for quantizating and running your model with low-bit kernels.  These are still experimental, and require you do development on an Arm-based Mac.  Also note that low-bit quantization often requires QAT (quantization-aware training) to give good quality results.  Currently dynamic shapes must be disabled when exporting a model with these kernels.
+We now give instructions for quantizating and running your model with low-bit kernels.  These are still experimental, and require you do development on an Arm-based Mac.  Also note that low-bit quantization often requires QAT (quantization-aware training) to give good quality results.
 
 First export your model for lowbit quantization (step 2 above):
 
@@ -408,7 +408,6 @@ python -m examples.models.llama.export_llama \
   -qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
   --group_size ${QLINEAR_GROUP_SIZE} \
   -E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
-  --disable_dynamic_shape \
   -d fp32
 ```
 
 
@@ -723,19 +723,6 @@ def _validate_args(args):
                 "Shared embedding is only supported with torchao quantization."
             )
 
-    if (
-        args.quantization_mode is not None
-        and args.quantization_mode.startswith("torchao:")
-    ) or (
-        args.embedding_quantize is not None
-        and args.embedding_quantize.startswith("torchao:")
-    ):
-        if args.enable_dynamic_shape:
-            raise ValueError(
-                "Dynamic shape is not currently supported with torchao ops. Please use --disable_dynamic_shape."
-                "If you need this feature, please file an issue."
-            )
-
 
 def _to_edge_and_lower_llama_xnnpack(
     builder_exported,