Unbreak test models llama CI (#6026)

larryliu0820 · facebook-github-bot · commit 7e54dab6f044 · 2024-10-08T15:55:19.000-07:00
Summary: Pull Request resolved: #6026 Did a bunch of debugging on OSS CI:https://github.com/pytorch/executorch/actions/runs/11241297226/job/31252590975 Was able to confirm although the problem happens in `ConvertToLinear` but the root cause is we are partitioning the graph differently between these two pytorch nightly: dev20240916 and dev20240917. The exported graph looks the same but the partitioner was behaving differently and causes the `ConvertToLinear` pass to error out. We can't really revert back to dev20240916 nightly because it breaks other CI jobs, see #5987. The current approach I'm taking avoids decomposing linear by using `to_edge_lower_and_transform` API. This avoids jumping into the rabbit hole of debugging the partitioning & tagging logic. Differential Revision: D64074891
diff --git a/examples/xnnpack/aot_compiler.py b/examples/xnnpack/aot_compiler.py
@@ -15,7 +15,11 @@
 import torch
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
 from executorch.devtools import generate_etrecord
-from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig
+from executorch.exir import (
+    EdgeCompileConfig,
+    ExecutorchBackendConfig,
+    to_edge_transform_and_lower,
+)
 from executorch.extension.export_util.utils import export_to_edge, save_pte_program
 
 from ..models import MODEL_NAME_TO_MODEL
@@ -81,29 +85,27 @@
 
     model = model.eval()
     # pre-autograd export. eventually this will become torch.export
-    model = torch.export.export_for_training(model, example_inputs).module()
+    ep = torch.export.export_for_training(model, example_inputs)
+    model = ep.module()
 
     if args.quantize:
         logging.info("Quantizing Model...")
         # TODO(T165162973): This pass shall eventually be folded into quantizer
         model = quantize(model, example_inputs)
 
-    edge = export_to_edge(
-        model,
-        example_inputs,
-        edge_compile_config=EdgeCompileConfig(
+    edge = to_edge_transform_and_lower(
+        ep,
+        partitioner=[XnnpackPartitioner()],
+        compile_config=EdgeCompileConfig(
             _check_ir_validity=False if args.quantize else True,
             _skip_dim_order=True,  # TODO(T182187531): enable dim order in xnnpack
         ),
     )
-    logging.info(f"Exported graph:\n{edge.exported_program().graph}")
+    logging.info(f"Exported and lowered graph:\n{edge.exported_program().graph}")
 
     # this is needed for the ETRecord as lowering modifies the graph in-place
     edge_copy = copy.deepcopy(edge)
 
-    edge = edge.to_backend(XnnpackPartitioner())
-    logging.info(f"Lowered graph:\n{edge.exported_program().graph}")
-
     exec_prog = edge.to_executorch(
         config=ExecutorchBackendConfig(extract_delegate_segments=False)
     )