Use single rounding as default for TOSA lowering

SaoirseARM · web-flow · commit dbd40f443814 · 2024-08-13T19:17:06.000-07:00
Differential Revision: D61240443 Pull Request resolved: #4591
diff --git a/backends/arm/operators/op_addmm.py b/backends/arm/operators/op_addmm.py
@@ -12,10 +12,7 @@
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import (
-    compute_multiplier_and_shift,
-    get_quant_node_args,
-)
+from executorch.backends.arm.tosa_quant_utils import build_rescale, get_quant_node_args
 
 from executorch.backends.arm.tosa_utils import build_reshape
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -128,32 +125,20 @@ def define_node(
             weight_scale = get_quant_node_args(weight_node_q_node).scale
 
             output_rescale_scale = (input_scale * weight_scale) / consumer_node_scale
-            (
-                multiplier_output,
-                shift_output,
-            ) = compute_multiplier_and_shift(output_rescale_scale)
-
-            attr_rescale_output = ts.TosaSerializerAttribute()
-            attr_rescale_output.RescaleAttribute(
-                input_zp=0,
-                output_zp=consumer_node_node_zp,
-                multiplier=[multiplier_output],
-                shift=[shift_output],
-                scale32=True,
-                double_round=True,
-                per_channel=False,
-                input_unsigned=False,
-                output_unsigned=False,
-            )
 
             reshaped_res = tosa_graph.addIntermediate(result_shape, ts.DType.INT32)
             build_reshape(tosa_graph, conv2d_res.name, result_shape, reshaped_res.name)
 
-            tosa_graph.addOperator(
-                TosaOp.Op().RESCALE,
-                [reshaped_res.name],
-                [output.name],
-                attr_rescale_output,
+            build_rescale(
+                tosa_fb=tosa_graph,
+                scale=output_rescale_scale,
+                input_node=reshaped_res,
+                output_name=output.name,
+                output_type=ts.DType.INT8,
+                output_shape=reshaped_res.shape,
+                input_zp=0,
+                output_zp=consumer_node_node_zp,
+                is_double_round=False,
             )
 
         else:
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -171,7 +171,7 @@ def build_rescale(
     output_shape,
     input_zp,
     output_zp,
-    is_double_round,
+    is_double_round=False,
 ):
     scale_width = 32 if is_scale32(output_type) else 16
     multiplier, shift = compute_multiplier_and_shift(scale, scale_width)
@@ -197,7 +197,7 @@ def build_rescale(
 
 
 def build_rescale_to_int32(
-    tosa_fb, input, input_zp, rescale_scale, is_scale32=True, is_double_round=True
+    tosa_fb, input, input_zp, rescale_scale, is_scale32=True, is_double_round=False
 ) -> TosaSerializerTensor:
     multiplier, shift = compute_multiplier_and_shift(rescale_scale)
     attr_rescale = ts.TosaSerializerAttribute()
@@ -230,7 +230,7 @@ def build_rescale_from_int32(
     output_zp,
     rescale_scale,
     is_scale32=True,
-    is_double_round=True,
+    is_double_round=False,
 ) -> TosaSerializerTensor:
     multiplier, shift = compute_multiplier_and_shift(rescale_scale)
     attr_rescale_output = ts.TosaSerializerAttribute()
@@ -329,9 +329,6 @@ def build_rescale_conv_output(
     output_scale,
     output_zp,
 ):
-    # Only use double round if we are doing 32 bit scaling
-    double_round = is_scale32(output_type)
-
     # TODO add check to verify if this is a Per-channel quantization.
     post_conv2d_scale = (input_scale.number * weight_scale.number) / output_scale.number
 
@@ -345,6 +342,5 @@ def build_rescale_conv_output(
         op.shape,
         0,
         output_zp.number,
-        double_round,
     )
     return