Arm backend: Add TOSA VGF encapsulated compilation target. (#10476)

robell · web-flow · commit 66314e40e576 · 2025-04-29T13:09:52.000+02:00
Add TOSA VGF encapsulated compilation target.

This change enables support for "vgf" files which wrap TOSA output and
include memory planning for target devices which can JIT TOSA to the
target ISA on-device.

 - Add a VgfQuantizer (same as TOSAQuantizer)
 - Add a VgfBackend and VgfPartitioner to produce TOSA wrapped in a VGF
 - Requires yet to be released converter_backend

### Test plan
As this is a new encapsulation with a tool that's not yet released,
integration with unit tests will come in a subsequent commit.

Signed-off-by: Rob Elliott &lt;robert.elliott@arm.com&gt;
diff --git a/backends/arm/TARGETS b/backends/arm/TARGETS
@@ -7,6 +7,8 @@ python_library(
         "ethosu_partitioner.py",
         "tosa_backend.py",
         "tosa_partitioner.py",
+        "vgf_backend.py",
+        "vgf_partitioner.py",
     ],
     deps = [
         ":arm_backend",
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
@@ -25,21 +25,37 @@ def __init__(self):
         self.output_format = None
         self.path_for_intermediates = None
         self.tosa_spec = None
-        self.input_order = None
+
+    def vgf_compile_spec(
+        self,
+        compiler_flags: Optional[str] = "",
+    ) -> "ArmCompileSpecBuilder":
+        """
+        Generate compile spec for VGF compatible targets
+
+        Args:
+            compiler_flags: Extra compiler flags for converter_backend
+        """
+        self.output_format = "vgf"
+        self.compiler_flags = [
+            compiler_flags,
+        ]
+        self.tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+MI")
+        return self
 
     def ethosu_compile_spec(
         self,
-        config: str,
-        system_config: str,
-        memory_mode: str,
+        target: str,
+        system_config: Optional[str] = None,
+        memory_mode: Optional[str] = None,
         extra_flags: Optional[str] = None,
         config_ini: Optional[str] = "Arm/vela.ini",
     ) -> "ArmCompileSpecBuilder":
         """
         Generate compile spec for Ethos-U NPU
 
         Args:
-            config: Ethos-U accelerator configuration, e.g. ethos-u55-128
+            target: Ethos-U accelerator configuration, e.g. ethos-u55-128
             system_config: System configuration to select from the Vel
                 configuration file
             memory_mode: Memory mode to select from the Vela configuration file
@@ -52,18 +68,38 @@ def ethosu_compile_spec(
         ), f"Output format already set to f{self.output_format}"
         self.output_format = "vela"
         self.compiler_flags = [
-            f"--accelerator-config={config}",
+            f"--accelerator-config={target}",
             f"--config={config_ini}",
         ]
+
+        # default system config and memory mode
+        if "ethos-u55" in target:
+            if system_config is None:
+                system_config = "Ethos_U55_High_End_Embedded"
+            if memory_mode is None:
+                memory_mode = "Shared_Sram"
+        elif "ethos-u85" in target:
+            if system_config is None:
+                system_config = "Ethos_U85_SYS_DRAM_Mid"
+            if memory_mode is None:
+                memory_mode = "Sram_Only"
+        else:
+            raise RuntimeError(f"Unknown ethos target: {target}")
+
         if system_config is not None:
             self.compiler_flags.append(f"--system-config={system_config}")
         if memory_mode is not None:
             self.compiler_flags.append(f"--memory-mode={memory_mode}")
         if extra_flags is not None:
             self.compiler_flags.append(extra_flags)
 
+        # We require raw output and regor, so add these flags if absent. This
+        # overrides any other output setting.
+        self.compiler_flags.append("--output-format=raw")
+        self.compiler_flags.append("--debug-force-regor")
+
         base_tosa_version = "TOSA-0.80+BI"
-        if "u55" in config:
+        if "u55" in target:
             # Add the Ethos-U55 extension marker
             base_tosa_version += "+u55"
         self.tosa_spec = TosaSpecification.create_from_string(base_tosa_version)
@@ -106,26 +142,22 @@ def build(self) -> List[CompileSpec]:
         # Always supply a TOSA version
         self.compile_spec = [CompileSpec("tosa_spec", str(self.tosa_spec).encode())]
 
-        if self.output_format == "vela":
-            self.compile_spec += [
-                CompileSpec("output_format", "vela".encode()),
-                CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()),
-            ]
-        elif self.output_format == "tosa":
-            self.compile_spec.append(CompileSpec("output_format", "tosa".encode()))
+        # Add compile flags, these are backend specific, refer to the backend
+        # documentation.
+        self.compile_spec += [
+            CompileSpec("compile_flags", " ".join(self.compiler_flags).encode()),
+        ]
+
+        # encode output format
+        self.compile_spec.append(
+            CompileSpec("output_format", self.output_format.encode())
+        )
 
         if self.path_for_intermediates is not None:
             self.compile_spec.append(
                 CompileSpec("debug_artifact_path", self.path_for_intermediates.encode())
             )
 
-        if self.input_order:
-            self.compile_spec.append(
-                CompileSpec(
-                    "input_order", " ".join(map(str, self.input_order)).encode()
-                )
-            )
-
         return self.compile_spec
 
 
@@ -148,6 +180,13 @@ def is_ethosu(compile_spec: List[CompileSpec]) -> bool:
     return False
 
 
+def is_vgf(compile_spec: List[CompileSpec]) -> bool:
+    for spec in compile_spec:
+        if spec.key == "output_format":
+            return spec.value.decode() == "vgf"
+    return False
+
+
 def get_tosa_spec(compile_spec: List[CompileSpec]) -> TosaSpecification:
     for spec in compile_spec:
         if spec.key == "tosa_spec":
diff --git a/backends/arm/arm_vela.py b/backends/arm/arm_vela.py
@@ -23,12 +23,11 @@
 
 # Pack either input or output tensor block, compose the related arrays into
 # per-io structs to simplify runtime use.
-def vela_bin_pack_io(prefix, data, shape_order=None):
+def vela_bin_pack_io(prefix, data):
     vela_input_shapes = data[prefix + "_shape"]
 
-    order = shape_order if shape_order else range(len(vela_input_shapes))
     ios = struct.pack("<i", len(vela_input_shapes))
-    for i in order:
+    for i in range(len(vela_input_shapes)):
         io_shape = vela_input_shapes[i]
         io_elem_size = data[prefix + "_elem_size"][i]
         io_offset = data[prefix + "_offset"][i]
@@ -45,9 +44,7 @@ def vela_bin_pack_io(prefix, data, shape_order=None):
 # Output via Vela to binary stream for ArmBackendEthosU
 # WARNING: Do not change this without changing VelaBinStream.cpp as that
 #          function consumes this format and the two need to align.
-def vela_compile(
-    tosa_flatbuffer: bytes, args: List[str], shape_order=None, verbose: bool = False
-):
+def vela_compile(tosa_flatbuffer: bytes, args: List[str], verbose: bool = False):
     """
     Compile a TOSA graph to a binary stream for ArmBackendEthosU using Vela.
     """
@@ -98,7 +95,7 @@ def vela_compile(
             bin_blocks["scratch_data"] = b"\x00" * block_length
 
             # Capture inputs and outputs
-            bin_blocks["inputs"] = vela_bin_pack_io("input", data, shape_order)
+            bin_blocks["inputs"] = vela_bin_pack_io("input", data)
             bin_blocks["outputs"] = vela_bin_pack_io("output", data)
 
             bin_blocks["vela_end_stream"] = b""
diff --git a/backends/arm/ethosu_backend.py b/backends/arm/ethosu_backend.py
@@ -35,19 +35,16 @@ class EthosUBackend(BackendDetails):
 
     @staticmethod
     def _compile_tosa_flatbuffer(
-        tosa_flatbuffer: bytes, compile_spec: list[CompileSpec]
+        tosa_flatbuffer: bytes, compile_spec: List[CompileSpec]
     ) -> bytes:
         """
         Static helper method to do the compilation of the TOSA flatbuffer
         representation to a target specific binary stream.
         """
         compile_flags = []
-        input_order = []
         for spec in compile_spec:
             if spec.key == "compile_flags":
                 compile_flags.append(spec.value.decode())
-            if spec.key == "input_order":
-                input_order = list(map(int, spec.value.decode().split(",")))
 
         if len(compile_flags) == 0:
             # Not testing for compile_flags correctness here, just that they are
@@ -60,7 +57,6 @@ def _compile_tosa_flatbuffer(
         binary = vela_compile(
             tosa_flatbuffer,
             compile_flags,
-            input_order,
             verbose=logger.getEffectiveLevel() == logging.INFO,
         )
         return binary
diff --git a/backends/arm/quantizer/__init__.py b/backends/arm/quantizer/__init__.py
@@ -9,6 +9,7 @@
     EthosUQuantizer,
     get_symmetric_quantization_config,
     TOSAQuantizer,
+    VgfQuantizer,
 )
 
 # Used in tests
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -27,6 +27,7 @@
 from executorch.backends.arm.arm_backend import (
     get_tosa_spec,
     is_ethosu,
+    is_vgf,
 )  # usort: skip
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.ao.quantization.fake_quantize import (
@@ -52,6 +53,7 @@
 __all__ = [
     "TOSAQuantizer",
     "EthosUQuantizer",
+    "VgfQuantizer",
     "get_symmetric_quantization_config",
 ]
 
@@ -358,3 +360,12 @@ def __init__(self, compile_spec: list[CompileSpec]) -> None:
 
         tosa_spec = get_tosa_spec(compile_spec)
         super().__init__(tosa_spec)
+
+
+class VgfQuantizer(TOSAQuantizer):
+    def __init__(self, compile_spec: list[CompileSpec]) -> None:
+        if not is_vgf(compile_spec):
+            raise RuntimeError("compile spec is not targeting VGF")
+
+        tosa_spec = get_tosa_spec(compile_spec)
+        super().__init__(tosa_spec)
diff --git a/backends/arm/tosa_backend.py b/backends/arm/tosa_backend.py
@@ -35,15 +35,15 @@
 logger = logging.getLogger(__name__)
 
 
-def _get_first_delegation_tag(graph_module) -> str | None:
-    """Get the first delegation tag from the graph_module or return None."""
+def arm_get_first_delegation_tag(graph_module) -> str:
+    """Get the first delegation tag from the graph_module or return empty string."""
     for node in graph_module.graph.nodes:
         tag = node.meta.get("delegation_tag")
         if tag:
             return tag
 
     logger.debug("No delegation tag found in partition.")
-    return None
+    return ""
 
 
 @final
@@ -63,16 +63,13 @@ def preprocess(  # noqa: C901
         artifact_path = None
         output_format = ""
         compile_flags = []
-        input_order = []
         for spec in compile_spec:
             if spec.key == "debug_artifact_path":
                 artifact_path = spec.value.decode()
             if spec.key == "output_format":
                 output_format = spec.value.decode()
             if spec.key == "compile_flags":
                 compile_flags.append(spec.value.decode())
-            if spec.key == "input_order":
-                input_order = list(map(int, spec.value.decode().split(",")))
 
         # Check that the output format is set correctly in the compile spec
         if output_format != "tosa":
@@ -129,14 +126,8 @@ def preprocess(  # noqa: C901
                 dbg_fail(node, graph_module, tosa_graph, artifact_path)
                 raise
 
-        if len(input_order) > 0:
-            if input_count != len(input_order):
-                raise RuntimeError(
-                    "The rank of the input order is not equal to amount of input tensors"
-                )
-
         if artifact_path:
-            tag = _get_first_delegation_tag(graph_module)
+            tag = arm_get_first_delegation_tag(graph_module)
             dbg_tosa_dump(
                 tosa_graph,
                 artifact_path,
diff --git a/backends/arm/vgf_backend.py b/backends/arm/vgf_backend.py
diff --git a/backends/arm/vgf_partitioner.py b/backends/arm/vgf_partitioner.py
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py

Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@`
`9`	`9`	`EthosUQuantizer,`
`10`	`10`	`get_symmetric_quantization_config,`
`11`	`11`	`TOSAQuantizer,`
	`12`	`+ VgfQuantizer,`
`12`	`13`	`)`
`13`	`14`
`14`	`15`	`# Used in tests`