Test new partitioner on all models

mcr229 · web-flow · commit 5c270451aeb2 · 2024-08-07T21:19:34.000-07:00
Differential Revision: D60492337 Pull Request resolved: #4576
diff --git a/backends/xnnpack/partition/config/__init__.py b/backends/xnnpack/partition/config/__init__.py
@@ -55,44 +55,41 @@
 )
 
 ALL_PARTITIONER_CONFIGS: List[Type[XNNPartitionerConfig]] = [
-    # GEMM-like Configs
-    AddmmConfig,
-    LinearConfig,
-    ConstantPadConfig,
-    ConvolutionConfig,
-    # BatchNorm Config
-    BatchNormConfig,
-    # Single Node Configs
     AbsConfig,
-    AvgPoolingConfig,
     AddConfig,
+    AddmmConfig,
+    AvgPoolingConfig,
+    BatchNormConfig,
     CatConfig,
     CeilConfig,
+    ConstantPadConfig,
+    ConvolutionConfig,
     ClampConfig,
     DivConfig,
+    # EluConfig, # Waiting for PyTorch Pin Update
     FloorConfig,
     HardtanhConfig,
     HardswishConfig,
     LeakyReLUConfig,
+    LinearConfig,
     MaxDimConfig,
     MaximumConfig,
     MaxPool2dConfig,
     MeanDimConfig,
     MinimumConfig,
     MulConfig,
     NegConfig,
+    PermuteConfig,
     PowConfig,
     PreluConfig,
-    SoftmaxConfig,
+    ReLUConfig,
     SigmoidConfig,
     SliceCopyConfig,
+    SoftmaxConfig,
     SquareRootConfig,
     SubConfig,
-    PermuteConfig,
-    # EluConfig, # Waiting for PyTorch Pin Update
-    ReLUConfig,
     UpsampleBilinear2dConfig,
-    # Quantization Op Configs
+    # Quant/Dequant Op Configs
     QuantizedPerTensorConfig,
     DeQuantizedPerTensorConfig,
 ]
diff --git a/backends/xnnpack/partition/configs.py b/backends/xnnpack/partition/configs.py
@@ -101,13 +101,16 @@
     exir_ops.edge.aten.addmm.default,  # TODO(T163877189) add constraint for addmm
 ]
 
+# This set is used to determine if an op is a supported Quantized Op. This is
+# used to determine whether a quantization op is implicit or explicit.
 SUPPORTED_IMPLICIT_Q_DQ_OP_NAMES_SET = {
     op.name()
     for op in (
         SUPPORTED_QUANT_OPS
         + [
             exir_ops.edge.aten._to_copy.default,
             exir_ops.edge.aten.linear.default,
+            exir_ops.edge.aten.convolution.default,
         ]
     )
 }
diff --git a/backends/xnnpack/test/models/deeplab_v3.py b/backends/xnnpack/test/models/deeplab_v3.py
@@ -32,8 +32,7 @@ def test_fp32_dl3(self):
         (
             Tester(self.dl3, self.model_inputs)
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
             .run_method_and_compare_outputs()
diff --git a/backends/xnnpack/test/models/edsr.py b/backends/xnnpack/test/models/edsr.py
@@ -21,8 +21,7 @@ def test_fp32_edsr(self):
         (
             Tester(self.edsr, self.model_inputs)
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
             .run_method_and_compare_outputs()
@@ -34,8 +33,7 @@ def _test_qs8_edsr(self):
             Tester(self.edsr, self.model_inputs)
             .quantize()
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
             .run_method_and_compare_outputs()
@@ -47,8 +45,7 @@ def test_qs8_edsr_no_calibrate(self):
             Tester(self.edsr, self.model_inputs)
             .quantize(Quantize(calibrate=False))
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
             .run_method_and_compare_outputs()
diff --git a/backends/xnnpack/test/models/emformer_rnnt.py b/backends/xnnpack/test/models/emformer_rnnt.py
@@ -38,8 +38,7 @@ def test_fp32_emformer_joiner(self):
         (
             Tester(joiner, joiner.get_example_inputs())
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .to_executorch()
             .serialize()
@@ -65,8 +64,7 @@ def _test_fp32_emformer_predictor(self):
         (
             Tester(predictor, predictor.get_example_inputs())
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .to_executorch()
             .serialize()
@@ -89,8 +87,7 @@ def test_fp32_emformer_transcriber(self):
         (
             Tester(transcriber, transcriber.get_example_inputs())
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .to_executorch()
             .serialize()
diff --git a/backends/xnnpack/test/models/inception_v3.py b/backends/xnnpack/test/models/inception_v3.py
@@ -34,9 +34,7 @@ def test_fp32_ic3(self):
         (
             Tester(self.ic3, self.model_inputs)
             .export()
-            .to_edge()
-            .check(list(self.all_operators))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(self.all_operators))
             .to_executorch()
@@ -55,9 +53,7 @@ def _test_qs8_ic3(self):
             Tester(self.ic3, self.model_inputs)
             .quantize()
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_quantization))
             .to_executorch()
@@ -76,9 +72,7 @@ def test_qs8_ic3_no_calibration(self):
             Tester(self.ic3, self.model_inputs)
             .quantize(Quantize(calibrate=False))
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_quantization))
             .to_executorch()
diff --git a/backends/xnnpack/test/models/inception_v4.py b/backends/xnnpack/test/models/inception_v4.py
@@ -32,9 +32,7 @@ def test_fp32_ic4(self):
         (
             Tester(self.ic4, self.model_inputs)
             .export()
-            .to_edge()
-            .check(list(self.all_operators))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(self.all_operators))
             .to_executorch()
@@ -52,9 +50,7 @@ def test_qs8_ic4(self):
             Tester(self.ic4, self.model_inputs)
             .quantize()
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_quantization))
             .to_executorch()
diff --git a/backends/xnnpack/test/models/llama2_et_example.py b/backends/xnnpack/test/models/llama2_et_example.py
@@ -39,10 +39,7 @@ def _test(self, dtype: torch.dtype = torch.float):
         (
             Tester(model, example_inputs)
             .export()
-            .to_edge()
-            .dump_artifact()
-            .partition()
-            .dump_artifact()
+            .to_edge_transform_and_lower()
             .to_executorch()
             .serialize()
             .run_method_and_compare_outputs(atol=5e-2, inputs=example_inputs)
diff --git a/backends/xnnpack/test/models/mobilebert.py b/backends/xnnpack/test/models/mobilebert.py
@@ -7,7 +7,7 @@
 import unittest
 
 import torch
-from executorch.backends.xnnpack.test.tester import Tester
+from executorch.backends.xnnpack.test.tester import Quantize, Tester
 from transformers import MobileBertConfig, MobileBertModel  # @manual
 
 
@@ -32,9 +32,19 @@ def test_fp32_mobilebert(self):
         (
             Tester(self.mobilebert, self.example_inputs)
             .export()
-            .to_edge()
-            .check(list(self.supported_ops))
-            .partition()
+            .to_edge_transform_and_lower()
+            .check_not(list(self.supported_ops))
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs(inputs=self.example_inputs)
+        )
+
+    def test_qs8_mobilebert(self):
+        (
+            Tester(self.mobilebert, self.example_inputs)
+            .quantize(Quantize(calibrate=False))
+            .export()
+            .to_edge_transform_and_lower()
             .check_not(list(self.supported_ops))
             .to_executorch()
             .serialize()
diff --git a/backends/xnnpack/test/models/mobilenet_v2.py b/backends/xnnpack/test/models/mobilenet_v2.py
@@ -39,9 +39,7 @@ def test_fp32_mv2(self):
         (
             Tester(self.mv2, self.model_inputs, dynamic_shapes=dynamic_shapes)
             .export()
-            .to_edge()
-            .check(list(self.all_operators))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(self.all_operators))
             .to_executorch()
@@ -67,9 +65,7 @@ def _test_qs8_mv2(self):
             Tester(self.mv2, self.model_inputs, dynamic_shapes=dynamic_shapes)
             .quantize()
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_quantization))
             .to_executorch()
@@ -95,9 +91,7 @@ def test_qs8_mv2_no_calibration(self):
             Tester(self.mv2, self.model_inputs, dynamic_shapes=dynamic_shapes)
             .quantize(Quantize(calibrate=False))
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_quantization))
             .to_executorch()
diff --git a/backends/xnnpack/test/models/mobilenet_v3.py b/backends/xnnpack/test/models/mobilenet_v3.py
@@ -28,7 +28,6 @@ class TestMobileNetV3(unittest.TestCase):
         "executorch_exir_dialects_edge__ops_aten_clamp_default",
         "executorch_exir_dialects_edge__ops_aten_permute_copy_default",
         "executorch_exir_dialects_edge__ops_aten_addmm_default",
-        "executorch_exir_dialects_edge__ops_aten__to_copy_default",
         "executorch_exir_dialects_edge__ops_aten_convolution_default",
         "executorch_exir_dialects_edge__ops_aten_relu_default",
         "executorch_exir_dialects_edge__ops_aten_add_Tensor",
@@ -41,9 +40,7 @@ def test_fp32_mv3(self):
         (
             Tester(self.mv3, self.model_inputs, dynamic_shapes=self.dynamic_shapes)
             .export()
-            .to_edge()
-            .check(list(self.all_operators))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(self.all_operators))
             .to_executorch()
@@ -53,18 +50,13 @@ def test_fp32_mv3(self):
 
     @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def _test_qs8_mv3(self):
-        ops_after_quantization = self.all_operators - {
-            "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
-        }
         ops_after_lowering = self.all_operators
 
         (
             Tester(self.mv3, self.model_inputs, dynamic_shapes=self.dynamic_shapes)
             .quantize()
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_tranform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_lowering))
             .to_executorch()
@@ -74,18 +66,13 @@ def _test_qs8_mv3(self):
 
     # TODO: Delete and only used calibrated test after T187799178
     def test_qs8_mv3_no_calibration(self):
-        ops_after_quantization = self.all_operators - {
-            "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
-        }
         ops_after_lowering = self.all_operators
 
         (
             Tester(self.mv3, self.model_inputs, dynamic_shapes=self.dynamic_shapes)
             .quantize(Quantize(calibrate=False))
             .export()
-            .to_edge()
-            .check(list(ops_after_quantization))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(ops_after_lowering))
             .to_executorch()
diff --git a/backends/xnnpack/test/models/resnet.py b/backends/xnnpack/test/models/resnet.py
@@ -39,8 +39,7 @@ def forward(self, x):
     def _test_exported_resnet(self, tester):
         (
             tester.export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check_not(
                 [
                     "executorch_exir_dialects_edge__ops_aten_convolution_default",
diff --git a/backends/xnnpack/test/models/torchvision_vit.py b/backends/xnnpack/test/models/torchvision_vit.py
@@ -73,9 +73,7 @@ def _test_exported_vit(self, tester, check_nots=None):
         }
         (
             tester.export()
-            .to_edge()
-            .check(list(self.all_operators))
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .check_not(list(lowerable_xnn_operators))
             .check_not(check_nots)
diff --git a/backends/xnnpack/test/models/very_big_model.py b/backends/xnnpack/test/models/very_big_model.py
@@ -34,8 +34,7 @@ def _test_very_big_model(self):
         (
             Tester(self.BigModel(), (torch.randn(1, 5000),))
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check(["torch.ops.higher_order.executorch_call_delegate"])
             .to_executorch()
             .serialize()
diff --git a/backends/xnnpack/test/models/w2l.py b/backends/xnnpack/test/models/w2l.py
@@ -25,8 +25,7 @@ def test_fp32_w2l(self):
         (
             Tester(self.wav2letter, self.model_inputs, self.dynamic_shape)
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check_not(
                 [
                     "executorch_exir_dialectes_edge__ops_aten_convolution_default",
@@ -44,8 +43,7 @@ def test_qs8_w2l(self):
             Tester(self.wav2letter.eval(), self.model_inputs, self.dynamic_shape)
             .quantize()
             .export()
-            .to_edge()
-            .partition()
+            .to_edge_transform_and_lower()
             .check_not(
                 [
                     "executorch_exir_dialectes_edge__ops_aten_convolution_default",

Original file line number	Diff line number	Diff line change
`@@ -101,13 +101,16 @@`
`101`	`101`	`exir_ops.edge.aten.addmm.default, # TODO(T163877189) add constraint for addmm`
`102`	`102`	`]`
`103`	`103`
	`104`	`+# This set is used to determine if an op is a supported Quantized Op. This is`
	`105`	`+# used to determine whether a quantization op is implicit or explicit.`
`104`	`106`	`SUPPORTED_IMPLICIT_Q_DQ_OP_NAMES_SET = {`
`105`	`107`	`op.name()`
`106`	`108`	`for op in (`
`107`	`109`	`SUPPORTED_QUANT_OPS`
`108`	`110`	`+ [`
`109`	`111`	`exir_ops.edge.aten._to_copy.default,`
`110`	`112`	`exir_ops.edge.aten.linear.default,`
	`113`	`+ exir_ops.edge.aten.convolution.default,`
`111`	`114`	`]`
`112`	`115`	`)`
`113`	`116`	`}`
Original file line number	Diff line number	Diff line change
`@@ -39,8 +39,7 @@ def forward(self, x):`
`39`	`39`	`def _test_exported_resnet(self, tester):`
`40`	`40`	`(`
`41`	`41`	`tester.export()`
`42`		`- .to_edge()`
`43`		`- .partition()`
	`42`	`+ .to_edge_transform_and_lower()`
`44`	`43`	`.check_not(`
`45`	`44`	`[`
`46`	`45`	`"executorch_exir_dialects_edge__ops_aten_convolution_default",`
Original file line number	Diff line number	Diff line change
`@@ -73,9 +73,7 @@ def _test_exported_vit(self, tester, check_nots=None):`
`73`	`73`	`}`
`74`	`74`	`(`
`75`	`75`	`tester.export()`
`76`		`- .to_edge()`
`77`		`- .check(list(self.all_operators))`
`78`		`- .partition()`
	`76`	`+ .to_edge_transform_and_lower()`
`79`	`77`	`.check(["torch.ops.higher_order.executorch_call_delegate"])`
`80`	`78`	`.check_not(list(lowerable_xnn_operators))`
`81`	`79`	`.check_not(check_nots)`
Original file line number	Diff line number	Diff line change
`@@ -25,8 +25,7 @@ def test_fp32_w2l(self):`
`25`	`25`	`(`
`26`	`26`	`Tester(self.wav2letter, self.model_inputs, self.dynamic_shape)`
`27`	`27`	`.export()`
`28`		`- .to_edge()`
`29`		`- .partition()`
	`28`	`+ .to_edge_transform_and_lower()`
`30`	`29`	`.check_not(`
`31`	`30`	`[`
`32`	`31`	`"executorch_exir_dialectes_edge__ops_aten_convolution_default",`
`@@ -44,8 +43,7 @@ def test_qs8_w2l(self):`
`44`	`43`	`Tester(self.wav2letter.eval(), self.model_inputs, self.dynamic_shape)`
`45`	`44`	`.quantize()`
`46`	`45`	`.export()`
`47`		`- .to_edge()`
`48`		`- .partition()`
	`46`	`+ .to_edge_transform_and_lower()`
`49`	`47`	`.check_not(`
`50`	`48`	`[`
`51`	`49`	`"executorch_exir_dialectes_edge__ops_aten_convolution_default",`