Skip to content

Commit aacea0d

Browse files
authored
[utils] Add script to generate elaborated IR and assembly tests (#89026)
Generally, IR and assembly test files benefit from being cleaned to remove unnecessary details. However, for tests requiring elaborate IR or assembly files where cleanup is less practical (e.g., large amount of debug information output from Clang), the current practice is to include the C/C++ source file and the generation instructions as comments. This is inconvenient when regeneration is needed. This patch adds `llvm/utils/update_test_body.py` to allow easier regeneration. `ld.lld --debug-names` tests (#86508) utilize this script for Clang-generated assembly tests. Note: `-o pipefail` is standard (since https://www.austingroupbugs.net/view.php?id=789) but not supported by dash. Link: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
1 parent dec8055 commit aacea0d

14 files changed

+314
-20
lines changed

llvm/docs/TestingGuide.rst

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,87 @@ actually participate in the test besides holding the ``RUN:`` lines.
433433
putting the extra files in an ``Inputs/`` directory. This pattern is
434434
deprecated.
435435

436+
Elaborated tests
437+
----------------
438+
439+
Generally, IR and assembly test files benefit from being cleaned to remove
440+
unnecessary details. However, for tests requiring elaborate IR or assembly
441+
files where cleanup is less practical (e.g., large amount of debug information
442+
output from Clang), you can include generation instructions within
443+
``split-file`` part called ``gen``. Then, run
444+
``llvm/utils/update_test_body.py`` on the test file to generate the needed
445+
content.
446+
447+
.. code-block:: none
448+
449+
; RUN: rm -rf %t && split-file %s %t && cd %t
450+
; RUN: opt -S a.ll ... | FileCheck %s
451+
452+
; CHECK: hello
453+
454+
;--- a.cc
455+
int va;
456+
;--- gen
457+
clang --target=x86_64-linux -S -emit-llvm -g a.cc -o -
458+
459+
;--- a.ll
460+
# content generated by the script 'gen'
461+
462+
.. code-block:: bash
463+
464+
PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll
465+
466+
The script will prepare extra files with ``split-file``, invoke ``gen``, and
467+
then rewrite the part after ``gen`` with its stdout.
468+
469+
For convenience, if the test needs one single assembly file, you can also wrap
470+
``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can
471+
skip ``split-file`` in RUN lines.
472+
473+
.. code-block:: none
474+
475+
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
476+
# RUN: ... | FileCheck %s
477+
478+
# CHECK: hello
479+
480+
.ifdef GEN
481+
#--- a.cc
482+
int va;
483+
#--- gen
484+
clang --target=x86_64-linux -S -g a.cc -o -
485+
.endif
486+
# content generated by the script 'gen'
487+
488+
.. note::
489+
490+
Consider specifying an explicit target triple to avoid differences when
491+
regeneration is needed on another machine.
492+
493+
``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands
494+
don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``.
495+
496+
Check prefixes should be placed before ``.endif`` since the part after
497+
``.endif`` is replaced.
498+
499+
If the test body contains multiple files, you can print ``---`` separators and
500+
utilize ``split-file`` in ``RUN`` lines.
501+
502+
.. code-block:: none
503+
504+
# RUN: rm -rf %t && split-file %s %t && cd %t
505+
...
506+
507+
#--- a.cc
508+
int va;
509+
#--- b.cc
510+
int vb;
511+
#--- gen
512+
clang --target=x86_64-linux -S -O1 -g a.cc -o -
513+
echo '#--- b.s'
514+
clang --target=x86_64-linux -S -O1 -g b.cc -o -
515+
#--- a.s
516+
436517
Fragile tests
437518
-------------
438519

llvm/test/tools/UpdateTestChecks/lit.local.cfg

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ def add_update_script_substition(
1919
# Specify an explicit default version in UTC tests, so that the --version
2020
# embedded in UTC_ARGS does not change in all test expectations every time
2121
# the default is bumped.
22-
extra_args += " --version=1"
22+
if name != "%update_test_body":
23+
extra_args += " --version=1"
2324
config.substitutions.append(
2425
(name, "'%s' %s %s" % (python_exe, script_path, extra_args))
2526
)
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
4748
config.available_features.add("llvm-mca-binary")
4849
mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
4950
add_update_script_substition("%update_test_checks", extra_args=mca_arg)
51+
52+
split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
53+
if os.path.isfile(split_file_path):
54+
add_update_script_substition("%update_test_body")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
2+
# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
3+
4+
.ifdef GEN
5+
#--- a.txt
6+
.long 0
7+
#--- b.txt
8+
.long 1
9+
#--- gen
10+
cat a.txt b.txt
11+
.endif
12+
.long 0
13+
.long 1
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
2+
; RUN: diff -u %S/Inputs/basic.test.expected %t
3+
4+
;--- a.txt
5+
@a = global i32 0
6+
;--- b.txt
7+
@b = global i32 0
8+
;--- gen
9+
cat a.txt
10+
echo ';--- b.ll'
11+
cat b.txt
12+
13+
;--- a.ll
14+
@a = global i32 0
15+
;--- b.ll
16+
@b = global i32 0
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
2+
# RUN: diff -u %S/Inputs/basic-asm.test.expected %t
3+
4+
.ifdef GEN
5+
#--- a.txt
6+
.long 0
7+
#--- b.txt
8+
.long 1
9+
#--- gen
10+
cat a.txt b.txt
11+
.endif
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
2+
; RUN: diff -u %S/Inputs/basic.test.expected %t
3+
4+
;--- a.txt
5+
@a = global i32 0
6+
;--- b.txt
7+
@b = global i32 0
8+
;--- gen
9+
cat a.txt
10+
echo ';--- b.ll'
11+
cat b.txt
12+
13+
;--- a.ll
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
2+
# RUN: diff -u %t %s
3+
4+
# CHECK: stdout is empty; forgot -o - ?
5+
6+
.ifdef GEN
7+
#--- a.txt
8+
.long 0
9+
#--- b.txt
10+
.long 1
11+
#--- gen
12+
true
13+
.endif
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
2+
3+
# CHECK: 'gen' does not exist
4+
5+
.ifdef GEN
6+
#--- a.txt
7+
.endif
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
2+
3+
# CHECK: log
4+
# CHECK-NEXT: 'gen' failed
5+
6+
.ifdef GEN
7+
#--- gen
8+
echo log >&2
9+
false # gen fails due to sh -e
10+
true
11+
.endif
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
2+
3+
# CHECK: 'gen' should be followed by another part (---) or .endif
4+
5+
#--- a.txt
6+
.long 0
7+
#--- gen
8+
cat a.txt
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import platform
2+
3+
if platform.system() == "Windows":
4+
config.unsupported = True

llvm/test/tools/llvm-dwarfdump/X86/formclass4.s

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
1-
# Source:
2-
# struct e {
3-
# enum {} f[16384];
4-
# short g;
5-
# };
6-
# e foo() {
7-
# auto E = new e;
8-
# return *E;
9-
# }
10-
# Compile with:
11-
# clang -O2 -gdwarf-4 -S a.cpp -o a4.s
12-
131
# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
142
# RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
153

164
# CHECK: DW_TAG_member
175
# CHECK: DW_AT_name ("g")
186
# CHECK: DW_AT_data_member_location (0x4000)
197

8+
.ifdef GEN
9+
#--- a.cpp
10+
struct e {
11+
enum {} f[16384];
12+
short g;
13+
};
14+
e foo() {
15+
auto E = new e;
16+
return *E;
17+
}
18+
#--- gen
19+
clang --target=x86_64-apple-macosx -O2 -gdwarf-4 -S a.cpp -o -
20+
.endif
21+
2022
.section __TEXT,__text,regular,pure_instructions
2123
.macosx_version_min 10, 14
2224
.globl __Z3foov ## -- Begin function _Z3foov

llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
# RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
22
# RUN: | llvm-dwarfdump - | FileCheck %s
33

4-
# Generated from:
5-
#
6-
# struct t1 { };
7-
# t1 v1;
8-
#
9-
# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
10-
114
# CHECK: DW_TAG_variable
125
# CHECK: DW_AT_type ({{.*}} "t1")
136

7+
.ifdef GEN
8+
#--- test.cpp
9+
struct t1 { };
10+
t1 v1;
11+
#--- gen
12+
clang++ --target=x86_64-linux -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
13+
.endif
1414
.text
1515
.file "test.cpp"
1616
.section .debug_types.dwo,"e",@progbits

llvm/utils/update_test_body.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/usr/bin/env python3
2+
"""Generate test body using split-file and a custom script.
3+
4+
The script will prepare extra files with `split-file`, invoke `gen`, and then
5+
rewrite the part after `gen` with its stdout.
6+
7+
https://llvm.org/docs/TestingGuide.html#elaborated-tests
8+
9+
Example:
10+
PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
11+
"""
12+
import argparse
13+
import contextlib
14+
import os
15+
import re
16+
import subprocess
17+
import sys
18+
import tempfile
19+
20+
21+
@contextlib.contextmanager
22+
def cd(directory):
23+
cwd = os.getcwd()
24+
os.chdir(directory)
25+
try:
26+
yield
27+
finally:
28+
os.chdir(cwd)
29+
30+
31+
def process(args, path):
32+
prolog = []
33+
seen_gen = False
34+
with open(path) as f:
35+
for line in f.readlines():
36+
line = line.rstrip()
37+
prolog.append(line)
38+
if (seen_gen and re.match(r"(.|//)---", line)) or line.startswith(".endif"):
39+
break
40+
if re.match(r"(.|//)--- gen", line):
41+
seen_gen = True
42+
else:
43+
print(
44+
"'gen' should be followed by another part (---) or .endif",
45+
file=sys.stderr,
46+
)
47+
return 1
48+
49+
if not seen_gen:
50+
print("'gen' does not exist", file=sys.stderr)
51+
return 1
52+
with tempfile.TemporaryDirectory(prefix="update_test_body_") as dir:
53+
try:
54+
# If the last line starts with ".endif", remove it.
55+
sub = subprocess.run(
56+
["split-file", "-", dir],
57+
input="\n".join(
58+
prolog[:-1] if prolog[-1].startswith(".endif") else prolog
59+
).encode(),
60+
capture_output=True,
61+
check=True,
62+
)
63+
except subprocess.CalledProcessError as ex:
64+
sys.stderr.write(ex.stderr.decode())
65+
return 1
66+
with cd(dir):
67+
if args.shell:
68+
print(f"invoke shell in the temporary directory '{dir}'")
69+
subprocess.run([os.environ.get("SHELL", "sh")])
70+
return 0
71+
72+
sub = subprocess.run(
73+
["sh", "-eu", "gen"],
74+
capture_output=True,
75+
# Don't encode the directory information to the Clang output.
76+
# Remove unneeded details (.ident) as well.
77+
env=dict(
78+
os.environ,
79+
CCC_OVERRIDE_OPTIONS="#^-fno-ident",
80+
PWD="/proc/self/cwd",
81+
),
82+
)
83+
sys.stderr.write(sub.stderr.decode())
84+
if sub.returncode != 0:
85+
print("'gen' failed", file=sys.stderr)
86+
return sub.returncode
87+
if not sub.stdout:
88+
print("stdout is empty; forgot -o - ?", file=sys.stderr)
89+
return 1
90+
content = sub.stdout.decode()
91+
92+
with open(path, "w") as f:
93+
# Print lines up to '.endif'.
94+
print("\n".join(prolog), file=f)
95+
# Then print the stdout of 'gen'.
96+
f.write(content)
97+
98+
99+
parser = argparse.ArgumentParser(
100+
description="Generate test body using split-file and a custom script"
101+
)
102+
parser.add_argument("files", nargs="+")
103+
parser.add_argument(
104+
"--shell", action="store_true", help="invoke shell instead of 'gen'"
105+
)
106+
args = parser.parse_args()
107+
for path in args.files:
108+
retcode = process(args, path)
109+
if retcode != 0:
110+
sys.exit(retcode)

0 commit comments

Comments
 (0)