Skip to content

Commit 12285cc

Browse files
authored
[DWARF] Use ULEB128 and not just one byte for directory indices (#109067)
According to the standard `DW_LNCT_directory_index` can be `data1`, `data2`, or `udata` (see 6.2.4.1). The code was using `data1`, but this limits the number of directories to 256, even if the variable holding the directory index is a `uint64_t`. `dsymutil` was hitting an assertion when trying to write directory indices higher than 255. Modify the classic and the parallel DWARF linkers to use `udata` and encode the directory indices as ULEB128 and provide a test that has more than 256 directories to check the changes are working as expected. For people that were using `dsymutil` with CUs that had between 128-256 directories, this will mean that for those indices 2 bytes will be used now, instead of just one.
1 parent 26029d7 commit 12285cc

File tree

3 files changed

+217
-5
lines changed

3 files changed

+217
-5
lines changed

llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable(
933933
LineSectionSize += MS->emitULEB128IntValue(StrForm);
934934

935935
LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_directory_index);
936-
LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data1);
936+
LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_udata);
937937

938938
if (HasChecksums) {
939939
LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_MD5);
@@ -952,8 +952,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable(
952952
// file_names (sequence of file name entries).
953953
for (auto File : P.FileNames) {
954954
emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool);
955-
MS->emitInt8(File.DirIdx);
956-
LineSectionSize += 1;
955+
LineSectionSize += MS->emitULEB128IntValue(File.DirIdx);
957956
if (HasChecksums) {
958957
MS->emitBinaryData(
959958
StringRef(reinterpret_cast<const char *>(File.Checksum.data()),

llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class DebugLineSectionEmitter {
215215
encodeULEB128(FileNameForm, Section.OS);
216216

217217
encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS);
218-
encodeULEB128(dwarf::DW_FORM_data1, Section.OS);
218+
encodeULEB128(dwarf::DW_FORM_udata, Section.OS);
219219

220220
if (HasChecksums) {
221221
encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS);
@@ -242,7 +242,7 @@ class DebugLineSectionEmitter {
242242
// A null-terminated string containing the full or relative path name of a
243243
// source file.
244244
Section.emitString(FileNameForm, *FileNameStr);
245-
Section.emitIntVal(File.DirIdx, 1);
245+
encodeULEB128(File.DirIdx, Section.OS);
246246

247247
if (HasChecksums) {
248248
assert((File.Checksum.size() == 16) &&
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
# RUN: rm -rf %t && mkdir -p %t
2+
# RUN: split-file %s %t
3+
# RUN: %python %t/all.py > %t/all.ll
4+
# RUN: sed 's@---TEMPORARY_DIR---@%{/t:regex_replacement}@' %t/debug.map.template > %t/debug.map
5+
# RUN: %llc_dwarf -mtriple x86_64-apple-macosx10.4.0 -o %t/all.o -filetype=obj %t/all.ll
6+
# RUN: dsymutil -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | FileCheck %s
7+
# RUN: dsymutil --linker parallel -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | tee %t/output.txt | FileCheck %s
8+
9+
# CHECK: include_directories[255] = "/tmp/tmp.0HPkdttdoU/d254"
10+
# CHECK-NEXT: include_directories[256] = "/tmp/tmp.0HPkdttdoU/d255"
11+
# CHECK-NEXT: include_directories[257] = "/tmp/tmp.0HPkdttdoU/d256"
12+
13+
# CHECK: dir_index: 255
14+
# CHECK: dir_index: 256
15+
# CHECK: dir_index: 257
16+
17+
# Original file generated doing the following (fish shell):
18+
# - for cnt in (seq 0 256); mkdir -p d$cnt ; printf "void func$cnd() {}\n#define FUNC$cnt func$cnt()\n" >> d$cnt/f$cnt.c ; end
19+
# - for cnt in (seq 0 256); printf "#include \"f$cnt.c\"" >> all.c ; end
20+
# - printf "void all() {\n" >> all.c
21+
# - for cnt in (seq 0 256); printf "FUNC$cnt;\n" >> all.c ; end
22+
# - printf "}\n" >> all.c
23+
# - clang -target x86_64-apple-macos -S -emit-llvm -gdwarf-5 -o all.ll all.c (for cnt in (seq 0 256); echo "-Id$cnt"; end)
24+
# - Edit all.ll manually and change all DIFile so the directory in filename is
25+
# moved into the directory field.
26+
# - Transformed into Python manually.
27+
28+
#--- all.py
29+
import math
30+
import string
31+
32+
PROLOGUE = string.Template("""\
33+
; ModuleID = 'all.c'
34+
source_filename = "all.c"
35+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
36+
target triple = "x86_64-apple-macosx10.4.0"
37+
""")
38+
39+
FUNCTION = string.Template("""\
40+
; Function Attrs: noinline nounwind optnone uwtable
41+
define void @func$idx() #0 !dbg !$dbg_reference_subprogram {
42+
ret void, !dbg !$dbg_reference_location_ret
43+
}
44+
""")
45+
46+
ALL_FUNCTION_PROLOGUE = string.Template("""\
47+
; Function Attrs: noinline nounwind optnone uwtable
48+
define void @all() #0 !dbg !$dbg_reference_subprogram {
49+
""")
50+
51+
ALL_FUNCTION_CALL = string.Template("""\
52+
call void @func$idx(), !dbg !$dbg_reference_location_call
53+
""")
54+
55+
ALL_FUNCTION_EPILOGUE = string.Template("""\
56+
ret void, !dbg !$dbg_reference_location_ret
57+
}
58+
""")
59+
60+
DWARF_PROLOGUE = string.Template("""\
61+
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" }
62+
63+
!llvm.dbg.cu = !{!0}
64+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
65+
!llvm.ident = !{!8}
66+
67+
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.1.6 (CentOS 18.1.6-3.el9)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
68+
!1 = !DIFile(filename: "all.c", directory: "/tmp/tmp.0HPkdttdoU", checksumkind: CSK_MD5, checksum: "8b5068f097f0c272ddc808ed2d82cb12")
69+
!2 = !{i32 7, !"Dwarf Version", i32 5}
70+
!3 = !{i32 2, !"Debug Info Version", i32 3}
71+
!4 = !{i32 1, !"wchar_size", i32 4}
72+
!5 = !{i32 8, !"PIC Level", i32 2}
73+
!6 = !{i32 7, !"uwtable", i32 2}
74+
!7 = !{i32 7, !"frame-pointer", i32 2}
75+
!8 = !{!"clang version 18.1.6 (CentOS 18.1.6-3.el9)"}
76+
""")
77+
78+
DWARF_FUNCTION_WITH_TYPE = string.Template("""\
79+
!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0)
80+
!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901")
81+
!11 = !DISubroutineType(types: !12)
82+
!12 = !{null}
83+
!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram)
84+
""")
85+
86+
DWARF_FUNCTION = string.Template("""\
87+
!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0)
88+
!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901")
89+
!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram)
90+
""")
91+
92+
DWARF_ALL_FUNCTION_PROLOGUE = string.Template("""\
93+
!$dbg_reference_subprogram = distinct !DISubprogram(name: "all", scope: !1, file: !1, line: $line_number, type: !11, scopeLine: $line_number, spFlags: DISPFlagDefinition, unit: !0)
94+
""")
95+
96+
DWARF_ALL_FUNCTION_LOCATION = string.Template("""\
97+
!$dbg_reference_location = !DILocation(line: $line_number, column: 1, scope: !$dbg_reference_subprogram)
98+
""")
99+
100+
NUM_FUNCS = 257
101+
102+
dbg_reference_subprogram = 9
103+
dbg_reference_file = 10
104+
dbg_reference_location = 13
105+
column_base = 15
106+
functions = []
107+
dwarf_subprograms = []
108+
109+
first = True
110+
for idx in range(NUM_FUNCS):
111+
functions.append(
112+
FUNCTION.substitute(
113+
idx=idx,
114+
dbg_reference_subprogram=dbg_reference_subprogram,
115+
dbg_reference_location_ret=dbg_reference_location,
116+
)
117+
)
118+
if first:
119+
dwarf_subprograms.append(
120+
DWARF_FUNCTION_WITH_TYPE.substitute(
121+
idx=idx,
122+
dbg_reference_subprogram=dbg_reference_subprogram,
123+
dbg_reference_file=dbg_reference_file,
124+
dbg_reference_location=dbg_reference_location,
125+
column=column_base,
126+
)
127+
)
128+
else:
129+
dwarf_subprograms.append(
130+
DWARF_FUNCTION.substitute(
131+
idx=idx,
132+
dbg_reference_subprogram=dbg_reference_subprogram,
133+
dbg_reference_file=dbg_reference_file,
134+
dbg_reference_location=dbg_reference_location,
135+
column=column_base + math.floor(math.log10(idx)),
136+
)
137+
)
138+
139+
dbg_reference_subprogram += 5 if first else 3
140+
dbg_reference_file += 5 if first else 3
141+
dbg_reference_location += 3
142+
first = False
143+
144+
dbg_reference_location = dbg_reference_subprogram + 1
145+
line_number = 258
146+
all_function = []
147+
dwarf_all_subprogram = []
148+
149+
all_function.append(
150+
ALL_FUNCTION_PROLOGUE.substitute(
151+
dbg_reference_subprogram=dbg_reference_subprogram
152+
)
153+
)
154+
dwarf_all_subprogram.append(
155+
DWARF_ALL_FUNCTION_PROLOGUE.substitute(
156+
dbg_reference_subprogram=dbg_reference_subprogram,
157+
line_number=line_number
158+
)
159+
)
160+
line_number += 1
161+
162+
for idx in range(NUM_FUNCS):
163+
all_function.append(
164+
ALL_FUNCTION_CALL.substitute(
165+
idx=idx,
166+
dbg_reference_location_call=dbg_reference_location,
167+
)
168+
)
169+
dwarf_all_subprogram.append(
170+
DWARF_ALL_FUNCTION_LOCATION.substitute(
171+
dbg_reference_location=dbg_reference_location,
172+
line_number=line_number,
173+
dbg_reference_subprogram=dbg_reference_subprogram,
174+
)
175+
)
176+
177+
dbg_reference_location += 1
178+
line_number += 1
179+
180+
all_function.append(
181+
ALL_FUNCTION_EPILOGUE.substitute(
182+
dbg_reference_location_ret=dbg_reference_location
183+
)
184+
)
185+
dwarf_all_subprogram.append(
186+
DWARF_ALL_FUNCTION_LOCATION.substitute(
187+
dbg_reference_location=dbg_reference_location,
188+
line_number=line_number,
189+
dbg_reference_subprogram=dbg_reference_subprogram,
190+
)
191+
)
192+
193+
print(PROLOGUE.substitute())
194+
for function in functions:
195+
print(function)
196+
for all_function_piece in all_function:
197+
print(all_function_piece, end='')
198+
print()
199+
print(DWARF_PROLOGUE.substitute(), end='')
200+
for dwarf_subprogram in dwarf_subprograms:
201+
print(dwarf_subprogram, end='')
202+
for dwarf_all_subprogram_piece in dwarf_all_subprogram:
203+
print(dwarf_all_subprogram_piece, end='')
204+
print()
205+
206+
#--- debug.map.template
207+
---
208+
triple: 'x86_64-apple-darwin'
209+
objects:
210+
- filename: ---TEMPORARY_DIR---/all.o
211+
symbols:
212+
- { sym: _all, objAddr: 0x0, binAddr: 0x0, size: 0x0 }
213+
...

0 commit comments

Comments
 (0)