Extract trace from prepare_and_convert and remove export_program

mcremon-meta · facebook-github-bot · commit 55f218c8b479 · 2025-04-25T14:23:50.000-07:00
Summary: As titled. Will be used in later changes to fix some inconsistencies.

Differential Revision: D73440517
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -39,35 +39,31 @@
 from torch._inductor.decomposition import remove_decompositions
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
-from torch.export import export
 from torch.export.exported_program import ExportedProgram
 
 from .passes import get_cadence_passes
 
 from .utils import print_ops_info
 
 
-def prepare_and_convert_pt2(
+def trace(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
-    quantizer: CadenceQuantizer,
-    calibration_data: Optional[list[tuple[object, ...]]] = None,
     dump_graphs: bool = False,
-) -> torch.fx.GraphModule:
+) -> ExportedProgram:
     """
-    Prepare and convert a model using the given quantizer.
-    The quantizer must be supplied and be the same as the one used to
-    fuse the model later, if applicable. If you do not expect that behavior,
-    please use quantize_and_fuse_pt2 instead, which will instantiate a
-    default quantizer for you if needed.
-    If calibration data is provided, it will be used to calibrate the model. If
-    not, the inputs will be used for calibration instead, which is useful for
-    unit tests but should not be used for end-to-end use cases.
-    Returns a GraphModule with the converted model.
+    Trace the model with export_for_training and return an ExportedProgram.
     """
 
+    # Make the model inference mode by calling model.eval()
+    model.eval()
+
+    # Prevent mkldnn decompositions
+    torch._C._set_mkldnn_enabled(False)
+
     # Get default decompositions
     decomp_table = torch.export.default_decompositions()
+
     # Select ops to keep
     ops_to_keep = [
         torch.ops.aten.conv1d.default,
@@ -77,19 +73,47 @@ def prepare_and_convert_pt2(
         torch.ops.aten.matmul.default,
         torch.ops.aten.rms_norm.default,
     ]
+
     # Remove decompositions for the ops we want to keep
     # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
     remove_decompositions(decomp_table, ops_to_keep)
+
     # Export with dynamo
-    model_gm = (
+    ep = (
         torch.export.export_for_training(model, inputs, strict=True)
         .run_decompositions(decomp_table)
-        .module()
     )
 
     if dump_graphs:
         logging.info("Graph before quantization:")
-        logging.info(model_gm.graph.print_tabular())
+        logging.info(ep.module().graph.print_tabular())
+
+    return ep
+
+
+def prepare_and_convert_pt2(
+    ep: ExportedProgram,
+    inputs: tuple[object, ...],
+    quantizer: CadenceQuantizer,
+    calibration_data: Optional[list[tuple[object, ...]]] = None,
+    dump_graphs: bool = False,
+) -> torch.fx.GraphModule:
+    """
+    Prepare and convert a model using the given quantizer.
+    The quantizer must be supplied and be the same as the one used to
+    fuse the model later, if applicable. If you do not expect that behavior,
+    please use quantize_and_fuse_pt2 instead, which will instantiate a
+    default quantizer for you if needed.
+    If calibration data is provided, it will be used to calibrate the model. If
+    not, the inputs will be used for calibration instead, which is useful for
+    unit tests but should not be used for end-to-end use cases.
+    Returns a GraphModule with the converted model.
+    """
+
+    # Get the graph module from the ExportedProgram
+    model_gm = ep.module()
+
+    assert isinstance(model_gm, torch.fx.GraphModule)
 
     # Prepare
     prepared_model = prepare_pt2e(model_gm, quantizer)
@@ -113,10 +137,10 @@ def prepare_and_convert_pt2(
 
 
 # Note: this is not meant as a primary API since it can create inconsistencies
-# if the quantizer here is different from the quantizer used to convert. It is
-# however useful for unit tests to separate the converted model from the fused
-# model, to be able to get reference numerics.
-# If this does not apply, please use quantize_and_fuse_pt2 instead.
+# if the quantizer here is different from the quantizer used to prepare/convert.
+# It is however useful for unit tests to separate the converted model from the
+# fused model, to be able to get reference numerics.
+# If this does not apply, please use quantize_pt2 instead.
 def fuse_pt2(
     converted_graph_module: torch.fx.GraphModule,
     quantizer: CadenceQuantizer,
@@ -151,16 +175,20 @@ def quantize_pt2(
     unit tests but should not be used for end-to-end use cases.
     Returns a GraphModule with the quantized model.
     """
-    # Make the model inference mode by calling model.eval()
-    model.eval()
 
     # Instantiate the quantizer to CadenceQuantizer if not supplied
     if not quantizer:
         quantizer = CadenceDefaultQuantizer()
 
+    ep = trace(model, inputs, dump_graphs=dump_graphs)
+
+    if dump_graphs:
+        logging.info("Graph after trace:")
+        logging.info(ep.graph.print_tabular())
+
     # Get converted graph module
     converted_gm = prepare_and_convert_pt2(
-        model, inputs, quantizer, calibration_data, dump_graphs=dump_graphs
+        ep, inputs, quantizer, calibration_data, dump_graphs=dump_graphs
     )
 
     # Get fused model
@@ -173,22 +201,6 @@ def quantize_pt2(
     return fused_gm
 
 
-# Export the model and lower it to an ExportedProgram (in aten IR)
-def export_program(
-    model: torch.nn.Module,
-    inputs: tuple[object, ...],
-) -> ExportedProgram:
-    assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
-
-    # Prevent mkldnn decompositions
-    torch._C._set_mkldnn_enabled(False)
-
-    # Export the model and return it.
-    expo_program = export(model, inputs, strict=True)
-
-    return expo_program
-
-
 def lower_ep_to_edge(
     expo_program: ExportedProgram,
     dump_graphs: bool = False,
@@ -237,7 +249,7 @@ def export_to_edge(
     assert isinstance(model, torch.nn.Module), "model should be an nn.Module"
 
     # Export the model into an ExportedProgram.
-    expo_program = export_program(model, inputs)
+    expo_program = trace(model, inputs)
 
     # Lower the model to edge IR.
     edge_prog_manager = lower_ep_to_edge(expo_program, dump_graphs, constant_methods)
diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py
@@ -18,6 +18,7 @@
     export_to_executorch_gen_etrecord,
     fuse_pt2,
     prepare_and_convert_pt2,
+    trace,
 )
 
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer
@@ -48,8 +49,11 @@ def export_model(
     # Instantiate the quantizer
     quantizer = CadenceDefaultQuantizer()
 
+    # Trace the model
+    ep = trace(model, example_inputs)
+
     # Convert the model
-    converted_model = prepare_and_convert_pt2(model, example_inputs, quantizer)
+    converted_model = prepare_and_convert_pt2(ep, example_inputs, quantizer)
 
     # Get reference outputs from converted model
     ref_outputs = converted_model(*example_inputs)