[utils] Add script to generate elaborated IR and assembly tests (#89026)

MaskRay · web-flow · commit aacea0d0f674 · 2024-05-08T23:58:55.000-07:00
Generally, IR and assembly test files benefit from being cleaned to remove unnecessary details. However, for tests requiring elaborate IR or assembly files where cleanup is less practical (e.g., large amount of debug information output from Clang), the current practice is to include the C/C++ source file and the generation instructions as comments. This is inconvenient when regeneration is needed. This patch adds `llvm/utils/update_test_body.py` to allow easier regeneration. `ld.lld --debug-names` tests (#86508) utilize this script for Clang-generated assembly tests. Note: `-o pipefail` is standard (since https://www.austingroupbugs.net/view.php?id=789) but not supported by dash. Link: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
@@ -433,6 +433,87 @@ actually participate in the test besides holding the ``RUN:`` lines.
   putting the extra files in an ``Inputs/`` directory. This pattern is
   deprecated.
 
+Elaborated tests
+----------------
+
+Generally, IR and assembly test files benefit from being cleaned to remove
+unnecessary details. However, for tests requiring elaborate IR or assembly
+files where cleanup is less practical (e.g., large amount of debug information
+output from Clang), you can include generation instructions within
+``split-file`` part called ``gen``. Then, run
+``llvm/utils/update_test_body.py`` on the test file to generate the needed
+content.
+
+.. code-block:: none
+
+    ; RUN: rm -rf %t && split-file %s %t && cd %t
+    ; RUN: opt -S a.ll ... | FileCheck %s
+
+    ; CHECK: hello
+
+    ;--- a.cc
+    int va;
+    ;--- gen
+    clang --target=x86_64-linux -S -emit-llvm -g a.cc -o -
+
+    ;--- a.ll
+    # content generated by the script 'gen'
+
+.. code-block:: bash
+
+   PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll
+
+The script will prepare extra files with ``split-file``, invoke ``gen``, and
+then rewrite the part after ``gen`` with its stdout.
+
+For convenience, if the test needs one single assembly file, you can also wrap
+``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can
+skip ``split-file`` in RUN lines.
+
+.. code-block:: none
+
+    # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+    # RUN: ... | FileCheck %s
+
+    # CHECK: hello
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- gen
+    clang --target=x86_64-linux -S -g a.cc -o -
+    .endif
+    # content generated by the script 'gen'
+
+.. note::
+
+  Consider specifying an explicit target triple to avoid differences when
+  regeneration is needed on another machine.
+
+  ``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands
+  don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``.
+
+  Check prefixes should be placed before ``.endif`` since the part after
+  ``.endif`` is replaced.
+
+If the test body contains multiple files, you can print ``---`` separators and
+utilize ``split-file`` in ``RUN`` lines.
+
+.. code-block:: none
+
+    # RUN: rm -rf %t && split-file %s %t && cd %t
+    ...
+
+    #--- a.cc
+    int va;
+    #--- b.cc
+    int vb;
+    #--- gen
+    clang --target=x86_64-linux -S -O1 -g a.cc -o -
+    echo '#--- b.s'
+    clang --target=x86_64-linux -S -O1 -g b.cc -o -
+    #--- a.s
+
 Fragile tests
 -------------
 
diff --git a/llvm/test/tools/UpdateTestChecks/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
@@ -19,7 +19,8 @@ def add_update_script_substition(
     # Specify an explicit default version in UTC tests, so that the --version
     # embedded in UTC_ARGS does not change in all test expectations every time
     # the default is bumped.
-    extra_args += " --version=1"
+    if name != "%update_test_body":
+        extra_args += " --version=1"
     config.substitutions.append(
         (name, "'%s' %s %s" % (python_exe, script_path, extra_args))
     )
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
     config.available_features.add("llvm-mca-binary")
     mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
     add_update_script_substition("%update_test_checks", extra_args=mca_arg)
+
+split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
+if os.path.isfile(split_file_path):
+    add_update_script_substition("%update_test_body")
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
+.long 0
+.long 1
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
@@ -0,0 +1,16 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+@a = global i32 0
+;--- b.txt
+@b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
+@a = global i32 0
+;--- b.ll
+@b = global i32 0
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
@@ -0,0 +1,13 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+@a = global i32 0
+;--- b.txt
+@b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+# RUN: diff -u %t %s
+
+# CHECK: stdout is empty; forgot -o - ?
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
@@ -0,0 +1,7 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' does not exist
+
+.ifdef GEN
+#--- a.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK:      log
+# CHECK-NEXT: 'gen' failed
+
+.ifdef GEN
+#--- gen
+echo log >&2
+false  # gen fails due to sh -e
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
@@ -0,0 +1,8 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' should be followed by another part (---) or .endif
+
+#--- a.txt
+.long 0
+#--- gen
+cat a.txt
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
@@ -0,0 +1,4 @@
+import platform
+
+if platform.system() == "Windows":
+    config.unsupported = True
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
@@ -1,22 +1,24 @@
-# Source:
-#   struct e {
-#     enum {} f[16384];
-#     short g;
-#   };
-#   e foo() {
-#     auto E = new e;
-#     return *E;
-#   }
-# Compile with:
-#   clang -O2 -gdwarf-4 -S a.cpp -o a4.s
-
 # RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
 # RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
 
 # CHECK: DW_TAG_member
 # CHECK: DW_AT_name ("g")
 # CHECK: DW_AT_data_member_location    (0x4000)
 
+.ifdef GEN
+#--- a.cpp
+struct e {
+  enum {} f[16384];
+  short g;
+};
+e foo() {
+  auto E = new e;
+  return *E;
+}
+#--- gen
+clang --target=x86_64-apple-macosx -O2 -gdwarf-4 -S a.cpp -o -
+.endif
+
 	.section	__TEXT,__text,regular,pure_instructions
 	.macosx_version_min 10, 14
 	.globl	__Z3foov                ## -- Begin function _Z3foov
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
@@ -1,16 +1,16 @@
 # RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
 # RUN:   | llvm-dwarfdump - | FileCheck %s
 
-# Generated from:
-#
-#   struct t1 { };
-#   t1 v1;
-#
-# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
-
 # CHECK: DW_TAG_variable
 # CHECK:   DW_AT_type ({{.*}} "t1")
 
+.ifdef GEN
+#--- test.cpp
+struct t1 { };
+t1 v1;
+#--- gen
+clang++ --target=x86_64-linux -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
+.endif
 	.text
 	.file	"test.cpp"
 	.section	.debug_types.dwo,"e",@progbits
diff --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Generate test body using split-file and a custom script.
+
+The script will prepare extra files with `split-file`, invoke `gen`, and then
+rewrite the part after `gen` with its stdout.
+
+https://llvm.org/docs/TestingGuide.html#elaborated-tests
+
+Example:
+PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+"""
+import argparse
+import contextlib
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+
+@contextlib.contextmanager
+def cd(directory):
+    cwd = os.getcwd()
+    os.chdir(directory)
+    try:
+        yield
+    finally:
+        os.chdir(cwd)
+
+
+def process(args, path):
+    prolog = []
+    seen_gen = False
+    with open(path) as f:
+        for line in f.readlines():
+            line = line.rstrip()
+            prolog.append(line)
+            if (seen_gen and re.match(r"(.|//)---", line)) or line.startswith(".endif"):
+                break
+            if re.match(r"(.|//)--- gen", line):
+                seen_gen = True
+        else:
+            print(
+                "'gen' should be followed by another part (---) or .endif",
+                file=sys.stderr,
+            )
+            return 1
+
+    if not seen_gen:
+        print("'gen' does not exist", file=sys.stderr)
+        return 1
+    with tempfile.TemporaryDirectory(prefix="update_test_body_") as dir:
+        try:
+            # If the last line starts with ".endif", remove it.
+            sub = subprocess.run(
+                ["split-file", "-", dir],
+                input="\n".join(
+                    prolog[:-1] if prolog[-1].startswith(".endif") else prolog
+                ).encode(),
+                capture_output=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as ex:
+            sys.stderr.write(ex.stderr.decode())
+            return 1
+        with cd(dir):
+            if args.shell:
+                print(f"invoke shell in the temporary directory '{dir}'")
+                subprocess.run([os.environ.get("SHELL", "sh")])
+                return 0
+
+            sub = subprocess.run(
+                ["sh", "-eu", "gen"],
+                capture_output=True,
+                # Don't encode the directory information to the Clang output.
+                # Remove unneeded details (.ident) as well.
+                env=dict(
+                    os.environ,
+                    CCC_OVERRIDE_OPTIONS="#^-fno-ident",
+                    PWD="/proc/self/cwd",
+                ),
+            )
+            sys.stderr.write(sub.stderr.decode())
+            if sub.returncode != 0:
+                print("'gen' failed", file=sys.stderr)
+                return sub.returncode
+            if not sub.stdout:
+                print("stdout is empty; forgot -o - ?", file=sys.stderr)
+                return 1
+            content = sub.stdout.decode()
+
+    with open(path, "w") as f:
+        # Print lines up to '.endif'.
+        print("\n".join(prolog), file=f)
+        # Then print the stdout of 'gen'.
+        f.write(content)
+
+
+parser = argparse.ArgumentParser(
+    description="Generate test body using split-file and a custom script"
+)
+parser.add_argument("files", nargs="+")
+parser.add_argument(
+    "--shell", action="store_true", help="invoke shell instead of 'gen'"
+)
+args = parser.parse_args()
+for path in args.files:
+    retcode = process(args, path)
+    if retcode != 0:
+        sys.exit(retcode)