[LLD] Tombstone LocalTU entry in .debug_names #70701

ayermolo · 2023-10-30T18:37:30Z

By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid
address. Changed so that UINT{32,64}_MAX value is used for tombstone. The
former is for DWARF32, and latter for DWARF64. The default for LLVM is 32 bit DWARF.
Although 64 bit DWARF is supported in LLVM, it's not widely used. The value was chosen
because it is not a valid offset.

This was introduced as a change to DWARF6 spec:
https://dwarfstd.org/issues/231013.1.html

By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid address. Changed so that MAX value is used for tombstone. This was introduced as a change to DWARF6 spec: https://dwarfstd.org/issues/231013.1.html

llvmbot · 2023-10-30T18:38:35Z

@llvm/pr-subscribers-lld-elf

@llvm/pr-subscribers-lld

Author: Alexander Yermolovich (ayermolo)

Changes

By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid
address. Changed so that MAX value is used for tombstone.

This was introduced as a change to DWARF6 spec:
https://dwarfstd.org/issues/231013.1.html

Patch is 86.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70701.diff

8 Files Affected:

(modified) lld/ELF/InputFiles.cpp (+3-2)
(modified) lld/ELF/InputSection.cpp (+18-3)
(added) lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s (+397)
(added) lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-main.s (+384)
(added) lld/test/ELF/Inputs/dwarf5-debug-names-type-comdat-helper.s (+410)
(added) lld/test/ELF/Inputs/dwarf5-debug-names-type-comdat-main.s (+397)
(added) lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test (+20)
(added) lld/test/ELF/x86-64-dwarf5-debug-names-type-comdat.test (+10)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a0d4be8ff9885b0..9237cd451f2e568 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1158,10 +1158,11 @@ void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
     StringRef name(stringTable.data() + eSym.st_name);
 
     symbols[i] = reinterpret_cast<Symbol *>(locals + i);
-    if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
+    if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) {
+      StringRef name = CHECK(eSym.getName(stringTable), this);
       new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
                                  /*discardedSecIdx=*/secIdx);
-    else
+    } else
       new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
                                eSym.st_value, eSym.st_size, sec);
     symbols[i]->partition = 1;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 02394cbae95d557..d839e7274763f85 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -20,8 +20,6 @@
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/xxhash.h"
-#include <algorithm>
-#include <mutex>
 #include <vector>
 
 using namespace llvm;
@@ -888,6 +886,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
   const bool isDebug = isDebugSection(*this);
   const bool isDebugLocOrRanges =
       isDebug && (name == ".debug_loc" || name == ".debug_ranges");
+  const bool isDebugNames = isDebug && name == ".debug_names";
   const bool isDebugLine = isDebug && name == ".debug_line";
   std::optional<uint64_t> tombstone;
   for (const auto &patAndValue : llvm::reverse(config->deadRelocInNonAlloc))
@@ -918,7 +917,8 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
       continue;
 
     if (tombstone ||
-        (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) {
+        ((isDebug && (type == target.symbolicRel || expr == R_DTPREL))) ||
+        isDebugNames) {
       // Resolve relocations in .debug_* referencing (discarded symbols or ICF
       // folded section symbols) to a tombstone value. Resolving to addend is
       // unsatisfactory because the result address range may collide with a
@@ -948,6 +948,21 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
       // TODO To reduce disruption, we use 0 instead of -1 as the tombstone
       // value. Enable -1 in a future release.
       auto *ds = dyn_cast<Defined>(&sym);
+      if (isDebugNames && dyn_cast<Undefined>(&sym)) {
+        uint64_t maxVal = 0;
+        switch (type) {
+        case R_X86_64_64:
+          maxVal = llvm::maxUIntN(64);
+          break;
+        case R_X86_64_32:
+          maxVal = llvm::maxUIntN(32);
+          break;
+        default:
+          llvm_unreachable("Unsupported relocation type in .debug_names.");
+        }
+        target.relocateNoSym(bufLoc, type, SignExtend64<bits>(maxVal));
+        continue;
+      }
       if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) {
         // If -z dead-reloc-in-nonalloc= is specified, respect it.
         const uint64_t value = tombstone ? SignExtend64<bits>(*tombstone)
diff --git a/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s b/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s
new file mode 100644
index 000000000000000..1c046bc1520dc96
--- /dev/null
+++ b/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s
@@ -0,0 +1,397 @@
+	.text
+	.file	"helper.cpp"
+	.globl	_Z3foov                         # -- Begin function _Z3foov
+	.p2align	4, 0x90
+	.type	_Z3foov,@function
+_Z3foov:                                # @_Z3foov
+.Lfunc_begin0:
+	.file	0 "/home/ayermolo/local/tasks/T138552329/typeDedupSmall" "helper.cpp" md5 0x305ec66c221c583021f8375b300e2591
+	.loc	0 2 0                           # helper.cpp:2:0
+	.cfi_startproc
+# %bb.0:                                # %entry
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+.Ltmp0:
+	.loc	0 4 3 prologue_end              # helper.cpp:4:3
+	xorl	%eax, %eax
+	.loc	0 4 3 epilogue_begin is_stmt 0  # helper.cpp:4:3
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Ltmp1:
+.Lfunc_end0:
+	.size	_Z3foov, .Lfunc_end0-_Z3foov
+	.cfi_endproc
+                                        # -- End function
+	.file	1 "." "header.h" md5 0x53699580704254cb1dd2a83230f8a7ea
+	.section	.debug_info,"G",@progbits,1175092228111723119,comdat
+.Ltu_begin0:
+	.long	4294967295                      # DWARF64 Mark
+	.quad	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	5                               # DWARF version number
+	.byte	2                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.quad	.debug_abbrev                   # Offset Into Abbrev. Section
+	.quad	1175092228111723119             # Type Signature
+	.quad	59                              # Type DIE Offset
+	.byte	1                               # Abbrev [1] 0x28:0x2d DW_TAG_type_unit
+	.short	33                              # DW_AT_language
+	.quad	.Lline_table_start0             # DW_AT_stmt_list
+	.quad	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.byte	2                               # Abbrev [2] 0x3b:0x10 DW_TAG_structure_type
+	.byte	5                               # DW_AT_calling_convention
+	.byte	9                               # DW_AT_name
+	.byte	8                               # DW_AT_byte_size
+	.byte	1                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.byte	3                               # Abbrev [3] 0x41:0x9 DW_TAG_member
+	.byte	7                               # DW_AT_name
+	.long	75                              # DW_AT_type
+	.byte	1                               # DW_AT_decl_file
+	.byte	2                               # DW_AT_decl_line
+	.byte	0                               # DW_AT_data_member_location
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x4b:0x5 DW_TAG_pointer_type
+	.long	80                              # DW_AT_type
+	.byte	5                               # Abbrev [5] 0x50:0x4 DW_TAG_base_type
+	.byte	8                               # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                               # Abbreviation Code
+	.byte	65                              # DW_TAG_type_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	54                              # DW_AT_calling_convention
+	.byte	11                              # DW_FORM_data1
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	13                              # DW_TAG_member
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	56                              # DW_AT_data_member_location
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	15                              # DW_TAG_pointer_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	37                              # DW_FORM_strx1
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	114                             # DW_AT_str_offsets_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	37                              # DW_FORM_strx1
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	115                             # DW_AT_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	27                              # DW_FORM_addrx
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.byte	110                             # DW_AT_linkage_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	8                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	37                              # DW_FORM_strx1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	9                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	60                              # DW_AT_declaration
+	.byte	25                              # DW_FORM_flag_present
+	.byte	105                             # DW_AT_signature
+	.byte	32                              # DW_FORM_ref_sig8
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"",@progbits
+.Lcu_begin0:
+	.long	4294967295                      # DWARF64 Mark
+	.quad	.Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+	.short	5                               # DWARF version number
+	.byte	1                               # DWARF Unit Type
+	.byte	8                               # Address Size (in bytes)
+	.quad	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	6                               # Abbrev [6] 0x18:0x4d DW_TAG_compile_unit
+	.byte	0                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	1                               # DW_AT_name
+	.quad	.Lstr_offsets_base0             # DW_AT_str_offsets_base
+	.quad	.Lline_table_start0             # DW_AT_stmt_list
+	.byte	2                               # DW_AT_comp_dir
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.quad	.Laddr_table_base0              # DW_AT_addr_base
+	.byte	7                               # Abbrev [7] 0x3b:0x1c DW_TAG_subprogram
+	.byte	0                               # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	86
+	.byte	3                               # DW_AT_linkage_name
+	.byte	4                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	2                               # DW_AT_decl_line
+	.long	87                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	8                               # Abbrev [8] 0x4b:0xb DW_TAG_variable
+	.byte	2                               # DW_AT_location
+	.byte	145
+	.byte	120
+	.byte	6                               # DW_AT_name
+	.byte	0                               # DW_AT_decl_file
+	.byte	3                               # DW_AT_decl_line
+	.long	91                              # DW_AT_type
+	.byte	0                               # End Of Children Mark
+	.byte	5                               # Abbrev [5] 0x57:0x4 DW_TAG_base_type
+	.byte	5                               # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	9                               # Abbrev [9] 0x5b:0x9 DW_TAG_structure_type
+                                        # DW_AT_declaration
+	.quad	1175092228111723119             # DW_AT_signature
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end1:
+	.section	.debug_str_offsets,"",@progbits
+	.long	4294967295                      # DWARF64 Mark
+	.quad	84                              # Length of String Offsets Set
+	.short	5
+	.short	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"clang version 18.0.0 ([email protected]:ayermolo/llvm-project.git 2a059ae838c2e444f47dc1dcdfefb6fc876a53c1)" # string offset=0
+.Linfo_string1:
+	.asciz	"helper.cpp"                    # string offset=105
+.Linfo_string2:
+	.asciz	"/home/ayermolo/local/tasks/T138552329/typeDedupSmall" # string offset=116
+.Linfo_string3:
+	.asciz	"foo"                           # string offset=169
+.Linfo_string4:
+	.asciz	"_Z3foov"                       # string offset=173
+.Linfo_string5:
+	.asciz	"int"                           # string offset=181
+.Linfo_string6:
+	.asciz	"f"                             # string offset=185
+.Linfo_string7:
+	.asciz	"Foo2a"                         # string offset=187
+.Linfo_string8:
+	.asciz	"c1"                            # string offset=193
+.Linfo_string9:
+	.asciz	"char"                          # string offset=196
+	.section	.debug_str_offsets,"",@progbits
+	.quad	.Linfo_string0
+	.quad	.Linfo_string1
+	.quad	.Linfo_string2
+	.quad	.Linfo_string4
+	.quad	.Linfo_string3
+	.quad	.Linfo_string5
+	.quad	.Linfo_string6
+	.quad	.Linfo_string8
+	.quad	.Linfo_string9
+	.quad	.Linfo_string7
+	.section	.debug_addr,"",@progbits
+	.long	4294967295                      # DWARF64 Mark
+	.quad	.Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+	.short	5                               # DWARF version number
+	.byte	8                               # Address size
+	.byte	0                               # Segment selector size
+.Laddr_table_base0:
+	.quad	.Lfunc_begin0
+.Ldebug_addr_end0:
+	.section	.debug_names,"",@progbits
+	.long	4294967295                      # DWARF64 Mark
+	.quad	.Lnames_end0-.Lnames_start0     # Header: unit length
+.Lnames_start0:
+	.short	5                               # Header: version
+	.short	0                               # Header: padding
+	.long	1                               # Header: compilation unit count
+	.long	1                               # Header: local type unit count
+	.long	0                               # Header: foreign type unit count
+	.long	5                               # Header: bucket count
+	.long	5                               # Header: name count
+	.long	.Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+	.long	8                               # Header: augmentation string size
+	.ascii	"LLVM0700"                      # Header: augmentation string
+	.quad	.Lcu_begin0                     # Compilation unit 0
+	.quad	.Ltu_begin0                     # Type unit 0
+	.long	0                               # Bucket 0
+	.long	0                               # Bucket 1
+	.long	0                               # Bucket 2
+	.long	1                               # Bucket 3
+	.long	2                               # Bucket 4
+	.long	193495088                       # Hash in Bucket 3
+	.long	193491849                       # Hash in Bucket 4
+	.long	259227804                       # Hash in Bucket 4
+	.long	2090147939                      # Hash in Bucket 4
+	.long	-1257882357                     # Hash in Bucket 4
+	.quad	.Linfo_string5                  # String in Bucket 3: int
+	.quad	.Linfo_string3                  # String in Bucket 4: foo
+	.quad	.Linfo_string7                  # String in Bucket 4: Foo2a
+	.quad	.Linfo_string9                  # String in Bucket 4: char
+	.quad	.Linfo_string4                  # String in Bucket 4: _Z3foov
+	.quad	.Lnames2-.Lnames_entries0       # Offset in Bucket 3
+	.quad	.Lnames0-.Lnames_entries0       # Offset in...
[truncated]

ayermolo · 2023-10-30T18:45:21Z

Side note. For DWARF64 the check on line 921 is not necessary. Do I need to change relocation in #70515?

dwblaikie · 2023-10-30T20:19:50Z

lld/ELF/InputSection.cpp

@@ -918,7 +917,8 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
      continue;

    if (tombstone ||
-        (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) {
+        ((isDebug && (type == target.symbolicRel || expr == R_DTPREL))) ||


Would be nice to know from @MaskRay (or anyone else) why this type == target.symbolicRel is here, and if there's some way we can generalize it to cover the .debug_names case we're interested in (a R_X86_64_32 relocation) without needing to special case isDebugNames in this condition

Yes. :D As I mentioned, for DWARF64 isDebugNames check is not necessary.

While we're waiting for an expert opinion, if youre bored, you could try removing the type == target.symbolicRel check and running the lld tests, about 5 tests fail - investigating one fo those failures to see what goes wrong in the absence of that condition might help point the way to understanding how to generalize the condition enough to cover the .debug_names situation, without generalizing too far and breaking those tests

Well basically that condition checks if relocation is 64bit: R_X86_64_64 (type == 1).
Primarily used in .debug_addr. Incidentally why DWARF64 tests works without adding isDebugNames to that condition. since for .debug_names relocation type is controlled by DWARF32 vs DWARF64.
Slightly more interesting case is expr == R_DTPREL. Since pretty much all relocs in debug sections are ABS. Looks like that relocation is used to reference TLS symbols. Learned something new today.

Yeah, mechanically I understand what the check does.

What I mean is: Why is this check necessary? What bad things happen (some tests fail, but I haven't looked closely at what they're testing or what the bad behavior looks like) when this check fails/isn't checked? And why is it OK to ignore this check in the .debug_names case? And could we look at that reason and generalize the check so it's not specifically about .debug_names, but about whatever general property is true in the .debug_names case and would be true in other cases?

Basically, it seems the type == target.symbolicRel check is overly restrictive (we have at least one counter example - the .debug_names R_X86_64_32 situation) and I'd like to know ideally how to generically relax this check to account for our situation, but also others that may come up in the future/just not to have a weird .debug_names special case that we don't really understand/can't explain why it's acceptable here, but not there - and layering special cases makes the code harder to understand and maintain.

Ah, I see what you mean. Yeah was going to dig in further today and see how to generalize. You are right it is over restrictive. Ideally it probably should be something like if (tombstone || debug), and then some logic depending on relocation type and probably section. I am hesitant to change current defaults due to downstream tools (cough lldb), breaking or something.

OK I think I understand the history here....
My resources:
https://sourceware.org/pipermail/binutils/2020-May/111357.html
https://lists.llvm.org/pipermail/llvm-dev/2020-May/141885.html
https://reviews.llvm.org/D81784
https://reviews.llvm.org/D84825

TLDR on this whole thing. This code primarily deals with an issue of what to do with symbols in debug sections when text sections they reference get GC/ICF. Those addresses need to be tombsoned. The 0x0 doesn't work in .debug_ranges/.debug_loc because before DWARF5 0x0,0x0 is how those lists ended. The -1 doesn't work because it has special meaning. So first -2 was implemented, but that had issues also, so then it was changed to 1 for loc/ranges and 0 for "everything else".

Except since relocation type allowed (type == target.symbolicRel) is 64 bit "everything else" is .debug_addr section and .debug_info with form DW_FORM_addr. Since in 32 bit dwarf only thing that is 64bit are addresses in .text section. At some point expr == R_DTPREL was added for TLS stuff.

The 32bit relocs were left alone to follow the same path as others and get assigned 0x0 + addend. Seems like mostly for "if it's not broken" reasons.
Although for non-debug non-alloc can have pointer subtraction:
https://lists.llvm.org/pipermail/llvm-dev/2020-May/141918.html

Anyway. IF my understanding is correct I don't see why we can't generalize for all debug sections to

Tombstone to 1 for ranges/loc

MAXINT for .debug_names

0 for everything else.

This will leave current behavior for cases that really matter, GC/ICF, and for relative relocations (32 bit outside of .debug_names), I don't think it should matter?

Hopefully I didn't miss anything. :)

@MaskRay WDYT?

dwblaikie · 2023-10-30T20:20:58Z

lld/ELF/InputSection.cpp

+      if (isDebugNames && dyn_cast<Undefined>(&sym)) {
+        uint64_t maxVal = 0;
+        switch (type) {
+        case R_X86_64_64:
+          maxVal = llvm::maxUIntN(64);
+          break;
+        case R_X86_64_32:
+          maxVal = llvm::maxUIntN(32);
+          break;
+        default:
+          llvm_unreachable("Unsupported relocation type in .debug_names.");
+        }


Be nice to hear from @MaskRay or otherwise about whether we can generalize this in some way.

@ayermolo perhaps you can check the commit history to see how the -1 as tombstone worked - it was committed a few years back, so there might be some hint about how to do that that doesn't require special casing each relocation

dwblaikie · 2023-10-30T20:22:12Z

lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s

@@ -0,0 +1,397 @@
+	.text


I imagine if you check the other test cases (for things like the DebugLoc/DebugRanges cases) you'll find they don't use full DWARF but probably just some relocations in an otherwise empty/small section - and we should probably test this in a similar way. (if the DebugLoc/DebugRanges tests do use fully formed DWARF, that'll be a surprise to me, but then happy to work with you to see if we can simplify the test(s) as much as possible in that form)

Yeah I can look into simplifying.

Simplified, or does it need to be even further that it's not valid .debug_names section, and just has reloc?

I'd go further, not making it a valid .debug_names section - like the tests in debug-dead-reloc.s (or it could even be added to that - it tests comdat .text already, so it could test comdat .debug_types too) - that tests special cases for .debug_loc and .debug_ranges, for instance, but doesn't have whole valid loclists or range lists.

(but mostly whatever @MaskRay's happy with)

ayermolo · 2023-11-02T19:44:04Z

Updated with more generic version. Step in right direction?

dwblaikie · 2023-11-02T21:34:15Z

lld/ELF/InputSection.cpp

+  if (!tombstone && isDebug)
+    tombstone = debugTombstone;
+  else if (tombstone)
+    tombstone = SignExtend64<bits>(*tombstone);


if this is sign extended to 64 bits, but then we have a 32 bit relocation, I'd expect this fails/errors out?

It is the same as original logic. It would go into if condition if tombstone is specified, and always extend.
bits == 64
So at least on ELF64 this is no-op. So TBH I am not sure why it's there.

template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { static_assert(B > 0, "Bit width can't be 0."); static_assert(B <= 64, "Bit width out of range."); return int64_t(x << (64 - B)) >> (64 - B); }

dwblaikie · 2023-11-02T21:36:49Z

lld/ELF/InputSection.cpp

+
+      // Extend to 64bit MAX for 64 bit relocations, LocalTU, in .debug_names.
+      if (isDebugNames && type == target.symbolicRel)
+        tombstone = SignExtend64<32>(*tombstone);


I guess this only handles one case of 64 bit relocations, but there might be others? & I'm not sure isDebugNames is the special case here - shouldn't it be known by the relocation, rather than the section the relocation is in?

ok, made it more generic.

dwblaikie · 2023-11-02T21:37:53Z

Updated with more generic version. Step in right direction?

I think it's probably going in the right direction - though I'm not sure we'll be quite able to figure out the right thing without some more lld expertise. But trying to make what progress we can in the interim.

…ames

…debug_names

ayermolo · 2023-11-02T23:49:52Z

Updated with more generic version. Step in right direction?

I think it's probably going in the right direction - though I'm not sure we'll be quite able to figure out the right thing without some more lld expertise. But trying to make what progress we can in the interim.

Anyone else we can rope into this?

…ry in .debug_names

…lTU entry in .debug_names

…ne LocalTU entry in .debug_names

MaskRay · 2023-11-06T00:41:21Z

Sorry for my late reply. IIUC clang -fdebug-types-section -g -gpubnames does not emit .debug_names. #68131 will implement the support (accidentally committed, reverted in f320065). git cherry-pick -n 9bbd2bf654634cd95dd0be7948ec8402c3c76e1e has merge conflicts, so it seems difficult for me to play with the example...

MaskRay · 2023-11-06T00:44:33Z

By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid address. Changed so that MAX value is used for tombstone.

This was introduced as a change to DWARF6 spec: dwarfstd.org/issues/231013.1.html

The description (the draft commit message when the patch eventually lands) can use some clang commands for the curious to play with.

Some information can be extracted from
@dwblaikie's https://discourse.llvm.org/t/debug-names-tombstoning-for-local-type-units/74410/2

$ clang++-tot null.cpp a.cpp b.cpp -g -fdebug-types-section -fuse-ld=lld -gpubnames && llvm-dwarfdump-tot a.out -debug-names
    LocalTU[0]: 0x00000030
 # some comments to explain what this is and how it is undesired can be useful
    LocalTU[0]: 0x00000000

MaskRay · 2023-11-06T00:48:38Z

lld/ELF/InputSection.cpp

@@ -917,8 +925,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
    if (expr == R_NONE)
      continue;

-    if (tombstone ||
-        (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) {
+    if (tombstoneValueToUse) {


I think
if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) should be the outer if and the tombstone logic should be the inner if.

ayermolo · 2023-11-06T19:12:23Z

Sorry for my late reply. IIUC clang -fdebug-types-section -g -gpubnames does not emit .debug_names. #68131 will implement the support (accidentally committed, reverted in f320065). git cherry-pick -n 9bbd2bf654634cd95dd0be7948ec8402c3c76e1e has merge conflicts, so it seems difficult for me to play with the example...
Sorry I thought I mentioned somewhere.
#70515 Is a current open PR for it. At least according to GH there are no merge conflicts for it.

…Tombstone LocalTU entry in .debug_names

ayermolo · 2023-11-10T02:02:44Z

@MaskRay ping :)

MaskRay

Code-wise this is close, but I want to see #68131 landed so that I can play with .debug_names tombstone values.

My previous comment still applies: the description should describe the choice of UINT32_MAX and UINT64_MAX, so that a curious reader doesn't have to read several external links to understand the choice.

MaskRay · 2023-11-14T00:11:03Z

lld/test/ELF/Inputs/dwarf5-debug-names-type-comdat.s

@@ -0,0 +1,26 @@
+	.section	.debug_info,"G",@progbits,1175092228111723119,comdat


For newer tests we tend to move away from Inputs/. dwarf5-debug-names-type-comdat.s and the DWARF64 counterpart can be placed into x86-64-dwarf5-64-debug-names-type-comdat.test. Search for .ifdef ERR and llvm-mc --defsym. We can use some assembly preprocessing to deduplicate the input.

MaskRay · 2023-11-14T00:12:33Z

lld/test/ELF/x86-64-dwarf5-debug-names-type-comdat.test

@@ -0,0 +1,16 @@
+// REQUIRES: x86


In newer tests we make the comment (non-RUN non-CHECK) marker outstanding. We typically use # RUN: # CHECK: and ## Regular comments.

MaskRay · 2023-11-14T00:13:17Z

lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test

@@ -0,0 +1,18 @@
+// REQUIRES: x86


We should place the DWARF64 test into the other file.

https://maskray.me/blog/2021-08-08-toolchain-testing#the-test-checks-at-the-wrong-layer#i-dont-know-an-existing-test-can-be-enhanced applies

dwblaikie · 2023-11-15T16:42:46Z

My previous comment still applies: the description should describe the choice of UINT32_MAX and UINT64_MAX, so that a curious reader doesn't have to read several external links to understand the choice.

Is there a way we could avoid hardcoding this, and just do "max of whatever the relocation size is"? (some earlier versions of this patch had switches over relocation types to try to do this - which seemed a bit verbose/brittle/repetitive - but if there was some way to generalize it tidily, I think that'd be ideal)

ayermolo · 2023-11-29T00:18:24Z

My previous comment still applies: the description should describe the choice of UINT32_MAX and UINT64_MAX, so that a curious reader doesn't have to read several external links to understand the choice.

Is there a way we could avoid hardcoding this, and just do "max of whatever the relocation size is"? (some earlier versions of this patch had switches over relocation types to try to do this - which seemed a bit verbose/brittle/repetitive - but if there was some way to generalize it tidily, I think that'd be ideal)

That is already what is happening. We always extend to 64bit MAX, and value is set by relocation type. 32 bit for DWARF32, and 64bit for DWARF64.

ayermolo · 2023-11-29T01:19:24Z

@MaskRay OK, cleaned up tests.

ayermolo · 2023-12-04T20:02:51Z

@MaskRay ping :)

MaskRay · 2023-12-06T22:35:43Z

Sorry for not chiming in earlier. I think the current description is
probably inadequate, so a future reader would have a hard time to
analyze this use case.

With git log --grep debug_names, I see that #73872 has recently landed
(Dec 4) and now the curious (like I) can redo the experiment.
I am still expecting a concrete example in the patch description (first comment of this PR). #70701 (comment)

I think the description can be rephased like the following:

#73872 added support for ...
.debug_names references local TUs in COMDAT .debug_info.
When the referenced .debug_info is non-prevailing, we leave a value of zero, which is a valid offset within the .debug_info section.

% cat a.cc
...
% cat b.cc
...
% clang++ -g -gpubnames -fdebug-types-section -fuse-ld=lld a.cc b.cc -o old
% llvm-dwarfdump -v old
...
Local Type Unit Offsets [
  LocalTU[0]: 0x00000000  // this is a valid offset within the .debug_info section
...

https://dwarfstd.org/issues/231013.1.html proposes that we use a tombstone value instead.
This patch implements the idea. The new llvm-dwarfdump output will look like:

% llvm-dwarfdump -v new
...
Local Type Unit Offsets [
  LocalTU[0]: 0xffffffff  // tombstone value
...

MaskRay · 2023-12-06T22:37:37Z

lld/ELF/InputSection.cpp

  const bool isDebugLine = isDebug && name == ".debug_line";
-  std::optional<uint64_t> tombstone;
+  std::optional<uint64_t> tombstone = std::nullopt;


An optional defaults to std::nullopt. Just default initialize it.

MaskRay · 2023-12-06T22:38:33Z

lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test

+# RUN: ld.lld %t.o %t1.o -o %t1
+# RUN: llvm-objdump -s %t1 | FileCheck %s --check-prefix=CHECK32
+
+# Test checks that LLD tombstones TU section that was de-duplicated using COMDAT to the maxium value.


For newer tests we use ## for non-RUN non-CHECK comments.

MaskRay · 2023-12-06T22:38:42Z

lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test

+# RUN: ld.lld %t.o %t1.o -o %t1
+# RUN: llvm-objdump -s %t1 | FileCheck %s --check-prefix=CHECK64
+
+# Test checks that LLD tombstones TU section that was de-duplicated using COMDAT to the maxium value.


MaskRay · 2023-12-06T22:39:15Z

lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test

+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux --defsym DWARF32=1 %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux --defsym DWARF32=1 %s -o %t1.o


You can do ld.lld %t.o %t.o.

-triple=x86_64 since there is no Linux specific stuff. We omit the OS to essentially say the test is generic to all ELF OSes.

MaskRay · 2023-12-06T22:40:28Z

lld/test/ELF/x86-64-dwarf5-64-debug-names-type-comdat.test

+.Lnames_end0:
+.endif
+
+# Test generated with clang++ -g2 -gdwarf-5 -gdwarf64 -gpubnames -fdebug-types-section -S and then manually reduced.


This needs a C++ source example.

MaskRay · 2023-12-06T22:58:25Z

lld/ELF/InputSection.cpp

@@ -20,8 +20,7 @@
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/xxhash.h"
-#include <algorithm>
-#include <mutex>


We should keep the headers. mutex is used by std::mutex in this file.

MaskRay · 2023-12-06T23:11:41Z

I think I'd implement the logic quite differently. Let me put up a patch...

MaskRay · 2023-12-07T01:04:44Z

#73872 added support for ...
.debug_names references local TUs in COMDAT .debug_info.
When the referenced .debug_info is non-prevailing, we leave a value of zero, which is a valid offset within the .debug_info section.

% cat a.cc
...
% cat b.cc
...
% clang++ -g -gpubnames -fdebug-types-section -fuse-ld=lld a.cc b.cc -o old
% llvm-dwarfdump -v old
...
Local Type Unit Offsets [
LocalTU[0]: 0x00000000 // this is a valid offset within the .debug_info section
...

https://dwarfstd.org/issues/231013.1.html proposes that we use a tombstone value instead.
This patch implements the idea. The new llvm-dwarfdump output will look like:

% llvm-dwarfdump -v new
...
Local Type Unit Offsets [
LocalTU[0]: 0xffffffff // tombstone value
...

I have created #74686 as an alternative. Thank you for your and @dwblaikie's descriptions to improve my .debug_names understanding :)

[LLD] Tombstone LocalTU entry in .debug_names

d2665cc

By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid address. Changed so that MAX value is used for tombstone. This was introduced as a change to DWARF6 spec: https://dwarfstd.org/issues/231013.1.html

llvmbot added lld lld:ELF labels Oct 30, 2023

fixup! [LLD] Tombstone LocalTU entry in .debug_names

792155d

fixup! fixup! [LLD] Tombstone LocalTU entry in .debug_names

854a281

ayermolo requested review from MaskRay, cmtice and dwblaikie October 30, 2023 19:49

dwblaikie reviewed Oct 30, 2023

View reviewed changes

fixup! fixup! fixup! [LLD] Tombstone LocalTU entry in .debug_names

b716d70

dwblaikie reviewed Nov 2, 2023

View reviewed changes

ayermolo added 2 commits November 2, 2023 16:45

fixup! fixup! fixup! fixup! [LLD] Tombstone LocalTU entry in .debug_n…

19fa33e

…ames

fixup! fixup! fixup! fixup! fixup! [LLD] Tombstone LocalTU entry in .…

a69878b

…debug_names

ayermolo added 3 commits November 2, 2023 16:51

fixup! fixup! fixup! fixup! fixup! fixup! [LLD] Tombstone LocalTU ent…

8e5c0b7

…ry in .debug_names

fixup! fixup! fixup! fixup! fixup! fixup! fixup! [LLD] Tombstone Loca…

be18f4f

…lTU entry in .debug_names

fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! [LLD] Tombsto…

e25f409

…ne LocalTU entry in .debug_names

MaskRay reviewed Nov 6, 2023

View reviewed changes

fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! fixup! [LLD] …

03afbda

…Tombstone LocalTU entry in .debug_names

MaskRay reviewed Nov 14, 2023

View reviewed changes

SLTozer mentioned this pull request Nov 15, 2023

[DebugInfo] Make DIArgList inherit from Metadata and always unique #72147

Merged

ayermolo added 2 commits November 28, 2023 17:13

Refactored tests. Consolidated into one test

036dbef

Removed redundant test.

1dfdf18

MaskRay reviewed Dec 6, 2023

View reviewed changes

ayermolo mentioned this pull request Dec 8, 2023

[ELF] Change .debug_names tombstone value to UINT32_MAX/UINT64_MAX #74686

Merged

ayermolo closed this Jan 6, 2024

		@@ -0,0 +1,26 @@
		.section .debug_info,"G",@progbits,1175092228111723119,comdat

[LLD] Tombstone LocalTU entry in .debug_names #70701

[LLD] Tombstone LocalTU entry in .debug_names #70701

Conversation

ayermolo commented Oct 30, 2023 • edited Loading

llvmbot commented Oct 30, 2023 • edited Loading

ayermolo commented Oct 30, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ayermolo Nov 2, 2023 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ayermolo commented Nov 2, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dwblaikie commented Nov 2, 2023

ayermolo commented Nov 2, 2023

MaskRay commented Nov 6, 2023 • edited Loading

MaskRay commented Nov 6, 2023

Choose a reason for hiding this comment

ayermolo commented Nov 6, 2023

ayermolo commented Nov 10, 2023

MaskRay left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dwblaikie commented Nov 15, 2023

ayermolo commented Nov 29, 2023

ayermolo commented Nov 29, 2023

ayermolo commented Dec 4, 2023

MaskRay commented Dec 6, 2023 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MaskRay commented Dec 6, 2023

MaskRay commented Dec 7, 2023

ayermolo commented Oct 30, 2023 •

edited

Loading

llvmbot commented Oct 30, 2023 •

edited

Loading

ayermolo Nov 2, 2023 •

edited

Loading

MaskRay commented Nov 6, 2023 •

edited

Loading

MaskRay commented Dec 6, 2023 •

edited

Loading