-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LLD] Tombstone LocalTU entry in .debug_names #70701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid address. Changed so that MAX value is used for tombstone. This was introduced as a change to DWARF6 spec: https://dwarfstd.org/issues/231013.1.html
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Alexander Yermolovich (ayermolo) ChangesBy default LLD uses 0x0... as a tombstone value. For Type Units this is a valid This was introduced as a change to DWARF6 spec: Patch is 86.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70701.diff 8 Files Affected:
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a0d4be8ff9885b0..9237cd451f2e568 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1158,10 +1158,11 @@ void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
StringRef name(stringTable.data() + eSym.st_name);
symbols[i] = reinterpret_cast<Symbol *>(locals + i);
- if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
+ if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) {
+ StringRef name = CHECK(eSym.getName(stringTable), this);
new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
/*discardedSecIdx=*/secIdx);
- else
+ } else
new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
eSym.st_value, eSym.st_size, sec);
symbols[i]->partition = 1;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 02394cbae95d557..d839e7274763f85 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -20,8 +20,6 @@
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/xxhash.h"
-#include <algorithm>
-#include <mutex>
#include <vector>
using namespace llvm;
@@ -888,6 +886,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
const bool isDebug = isDebugSection(*this);
const bool isDebugLocOrRanges =
isDebug && (name == ".debug_loc" || name == ".debug_ranges");
+ const bool isDebugNames = isDebug && name == ".debug_names";
const bool isDebugLine = isDebug && name == ".debug_line";
std::optional<uint64_t> tombstone;
for (const auto &patAndValue : llvm::reverse(config->deadRelocInNonAlloc))
@@ -918,7 +917,8 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
continue;
if (tombstone ||
- (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) {
+ ((isDebug && (type == target.symbolicRel || expr == R_DTPREL))) ||
+ isDebugNames) {
// Resolve relocations in .debug_* referencing (discarded symbols or ICF
// folded section symbols) to a tombstone value. Resolving to addend is
// unsatisfactory because the result address range may collide with a
@@ -948,6 +948,21 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
// TODO To reduce disruption, we use 0 instead of -1 as the tombstone
// value. Enable -1 in a future release.
auto *ds = dyn_cast<Defined>(&sym);
+ if (isDebugNames && dyn_cast<Undefined>(&sym)) {
+ uint64_t maxVal = 0;
+ switch (type) {
+ case R_X86_64_64:
+ maxVal = llvm::maxUIntN(64);
+ break;
+ case R_X86_64_32:
+ maxVal = llvm::maxUIntN(32);
+ break;
+ default:
+ llvm_unreachable("Unsupported relocation type in .debug_names.");
+ }
+ target.relocateNoSym(bufLoc, type, SignExtend64<bits>(maxVal));
+ continue;
+ }
if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) {
// If -z dead-reloc-in-nonalloc= is specified, respect it.
const uint64_t value = tombstone ? SignExtend64<bits>(*tombstone)
diff --git a/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s b/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s
new file mode 100644
index 000000000000000..1c046bc1520dc96
--- /dev/null
+++ b/lld/test/ELF/Inputs/dwarf5-64-debug-names-type-comdat-helper.s
@@ -0,0 +1,397 @@
+ .text
+ .file "helper.cpp"
+ .globl _Z3foov # -- Begin function _Z3foov
+ .p2align 4, 0x90
+ .type _Z3foov,@function
+_Z3foov: # @_Z3foov
+.Lfunc_begin0:
+ .file 0 "/home/ayermolo/local/tasks/T138552329/typeDedupSmall" "helper.cpp" md5 0x305ec66c221c583021f8375b300e2591
+ .loc 0 2 0 # helper.cpp:2:0
+ .cfi_startproc
+# %bb.0: # %entry
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+.Ltmp0:
+ .loc 0 4 3 prologue_end # helper.cpp:4:3
+ xorl %eax, %eax
+ .loc 0 4 3 epilogue_begin is_stmt 0 # helper.cpp:4:3
+ popq %rbp
+ .cfi_def_cfa %rsp, 8
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size _Z3foov, .Lfunc_end0-_Z3foov
+ .cfi_endproc
+ # -- End function
+ .file 1 "." "header.h" md5 0x53699580704254cb1dd2a83230f8a7ea
+ .section .debug_info,"G",@progbits,1175092228111723119,comdat
+.Ltu_begin0:
+ .long 4294967295 # DWARF64 Mark
+ .quad .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 2 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .quad .debug_abbrev # Offset Into Abbrev. Section
+ .quad 1175092228111723119 # Type Signature
+ .quad 59 # Type DIE Offset
+ .byte 1 # Abbrev [1] 0x28:0x2d DW_TAG_type_unit
+ .short 33 # DW_AT_language
+ .quad .Lline_table_start0 # DW_AT_stmt_list
+ .quad .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .byte 2 # Abbrev [2] 0x3b:0x10 DW_TAG_structure_type
+ .byte 5 # DW_AT_calling_convention
+ .byte 9 # DW_AT_name
+ .byte 8 # DW_AT_byte_size
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 3 # Abbrev [3] 0x41:0x9 DW_TAG_member
+ .byte 7 # DW_AT_name
+ .long 75 # DW_AT_type
+ .byte 1 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .byte 0 # DW_AT_data_member_location
+ .byte 0 # End Of Children Mark
+ .byte 4 # Abbrev [4] 0x4b:0x5 DW_TAG_pointer_type
+ .long 80 # DW_AT_type
+ .byte 5 # Abbrev [5] 0x50:0x4 DW_TAG_base_type
+ .byte 8 # DW_AT_name
+ .byte 6 # DW_AT_encoding
+ .byte 1 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 65 # DW_TAG_type_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 1 # DW_CHILDREN_yes
+ .byte 54 # DW_AT_calling_convention
+ .byte 11 # DW_FORM_data1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 13 # DW_TAG_member
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 56 # DW_AT_data_member_location
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 15 # DW_TAG_pointer_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 6 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 7 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 8 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 9 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 60 # DW_AT_declaration
+ .byte 25 # DW_FORM_flag_present
+ .byte 105 # DW_AT_signature
+ .byte 32 # DW_FORM_ref_sig8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long 4294967295 # DWARF64 Mark
+ .quad .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .quad .debug_abbrev # Offset Into Abbrev. Section
+ .byte 6 # Abbrev [6] 0x18:0x4d DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .quad .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .quad .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .quad .Laddr_table_base0 # DW_AT_addr_base
+ .byte 7 # Abbrev [7] 0x3b:0x1c DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 3 # DW_AT_linkage_name
+ .byte 4 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .long 87 # DW_AT_type
+ # DW_AT_external
+ .byte 8 # Abbrev [8] 0x4b:0xb DW_TAG_variable
+ .byte 2 # DW_AT_location
+ .byte 145
+ .byte 120
+ .byte 6 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ .long 91 # DW_AT_type
+ .byte 0 # End Of Children Mark
+ .byte 5 # Abbrev [5] 0x57:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 9 # Abbrev [9] 0x5b:0x9 DW_TAG_structure_type
+ # DW_AT_declaration
+ .quad 1175092228111723119 # DW_AT_signature
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end1:
+ .section .debug_str_offsets,"",@progbits
+ .long 4294967295 # DWARF64 Mark
+ .quad 84 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "clang version 18.0.0 ([email protected]:ayermolo/llvm-project.git 2a059ae838c2e444f47dc1dcdfefb6fc876a53c1)" # string offset=0
+.Linfo_string1:
+ .asciz "helper.cpp" # string offset=105
+.Linfo_string2:
+ .asciz "/home/ayermolo/local/tasks/T138552329/typeDedupSmall" # string offset=116
+.Linfo_string3:
+ .asciz "foo" # string offset=169
+.Linfo_string4:
+ .asciz "_Z3foov" # string offset=173
+.Linfo_string5:
+ .asciz "int" # string offset=181
+.Linfo_string6:
+ .asciz "f" # string offset=185
+.Linfo_string7:
+ .asciz "Foo2a" # string offset=187
+.Linfo_string8:
+ .asciz "c1" # string offset=193
+.Linfo_string9:
+ .asciz "char" # string offset=196
+ .section .debug_str_offsets,"",@progbits
+ .quad .Linfo_string0
+ .quad .Linfo_string1
+ .quad .Linfo_string2
+ .quad .Linfo_string4
+ .quad .Linfo_string3
+ .quad .Linfo_string5
+ .quad .Linfo_string6
+ .quad .Linfo_string8
+ .quad .Linfo_string9
+ .quad .Linfo_string7
+ .section .debug_addr,"",@progbits
+ .long 4294967295 # DWARF64 Mark
+ .quad .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .section .debug_names,"",@progbits
+ .long 4294967295 # DWARF64 Mark
+ .quad .Lnames_end0-.Lnames_start0 # Header: unit length
+.Lnames_start0:
+ .short 5 # Header: version
+ .short 0 # Header: padding
+ .long 1 # Header: compilation unit count
+ .long 1 # Header: local type unit count
+ .long 0 # Header: foreign type unit count
+ .long 5 # Header: bucket count
+ .long 5 # Header: name count
+ .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+ .long 8 # Header: augmentation string size
+ .ascii "LLVM0700" # Header: augmentation string
+ .quad .Lcu_begin0 # Compilation unit 0
+ .quad .Ltu_begin0 # Type unit 0
+ .long 0 # Bucket 0
+ .long 0 # Bucket 1
+ .long 0 # Bucket 2
+ .long 1 # Bucket 3
+ .long 2 # Bucket 4
+ .long 193495088 # Hash in Bucket 3
+ .long 193491849 # Hash in Bucket 4
+ .long 259227804 # Hash in Bucket 4
+ .long 2090147939 # Hash in Bucket 4
+ .long -1257882357 # Hash in Bucket 4
+ .quad .Linfo_string5 # String in Bucket 3: int
+ .quad .Linfo_string3 # String in Bucket 4: foo
+ .quad .Linfo_string7 # String in Bucket 4: Foo2a
+ .quad .Linfo_string9 # String in Bucket 4: char
+ .quad .Linfo_string4 # String in Bucket 4: _Z3foov
+ .quad .Lnames2-.Lnames_entries0 # Offset in Bucket 3
+ .quad .Lnames0-.Lnames_entries0 # Offset in...
[truncated]
|
Side note. For DWARF64 the check on line 921 is not necessary. Do I need to change relocation in #70515? |
lld/ELF/InputSection.cpp
Outdated
@@ -918,7 +917,8 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { | |||
continue; | |||
|
|||
if (tombstone || | |||
(isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { | |||
((isDebug && (type == target.symbolicRel || expr == R_DTPREL))) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be nice to know from @MaskRay (or anyone else) why this type == target.symbolicRel
is here, and if there's some way we can generalize it to cover the .debug_names
case we're interested in (a R_X86_64_32
relocation) without needing to special case isDebugNames
in this condition
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. :D As I mentioned, for DWARF64 isDebugNames check is not necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While we're waiting for an expert opinion, if youre bored, you could try removing the type == target.symbolicRel
check and running the lld tests, about 5 tests fail - investigating one fo those failures to see what goes wrong in the absence of that condition might help point the way to understanding how to generalize the condition enough to cover the .debug_names
situation, without generalizing too far and breaking those tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well basically that condition checks if relocation is 64bit: R_X86_64_64 (type == 1).
Primarily used in .debug_addr. Incidentally why DWARF64 tests works without adding isDebugNames to that condition. since for .debug_names relocation type is controlled by DWARF32 vs DWARF64.
Slightly more interesting case is expr == R_DTPREL. Since pretty much all relocs in debug sections are ABS. Looks like that relocation is used to reference TLS symbols. Learned something new today.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, mechanically I understand what the check does.
What I mean is: Why is this check necessary? What bad things happen (some tests fail, but I haven't looked closely at what they're testing or what the bad behavior looks like) when this check fails/isn't checked? And why is it OK to ignore this check in the .debug_names
case? And could we look at that reason and generalize the check so it's not specifically about .debug_names
, but about whatever general property is true in the .debug_names
case and would be true in other cases?
Basically, it seems the type == target.symbolicRel
check is overly restrictive (we have at least one counter example - the .debug_names
R_X86_64_32
situation) and I'd like to know ideally how to generically relax this check to account for our situation, but also others that may come up in the future/just not to have a weird .debug_names
special case that we don't really understand/can't explain why it's acceptable here, but not there - and layering special cases makes the code harder to understand and maintain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see what you mean. Yeah was going to dig in further today and see how to generalize. You are right it is over restrictive. Ideally it probably should be something like if (tombstone || debug), and then some logic depending on relocation type and probably section. I am hesitant to change current defaults due to downstream tools (cough lldb), breaking or something.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK I think I understand the history here....
My resources:
https://sourceware.org/pipermail/binutils/2020-May/111357.html
https://lists.llvm.org/pipermail/llvm-dev/2020-May/141885.html
https://reviews.llvm.org/D81784
https://reviews.llvm.org/D84825
TLDR on this whole thing. This code primarily deals with an issue of what to do with symbols in debug sections when text sections they reference get GC/ICF. Those addresses need to be tombsoned. The 0x0 doesn't work in .debug_ranges/.debug_loc because before DWARF5 0x0,0x0 is how those lists ended. The -1 doesn't work because it has special meaning. So first -2 was implemented, but that had issues also, so then it was changed to 1 for loc/ranges and 0 for "everything else".
Except since relocation type allowed (type == target.symbolicRel) is 64 bit "everything else" is .debug_addr section and .debug_info with form DW_FORM_addr. Since in 32 bit dwarf only thing that is 64bit are addresses in .text section. At some point expr == R_DTPREL was added for TLS stuff.
The 32bit relocs were left alone to follow the same path as others and get assigned 0x0 + addend. Seems like mostly for "if it's not broken" reasons.
Although for non-debug non-alloc can have pointer subtraction:
https://lists.llvm.org/pipermail/llvm-dev/2020-May/141918.html
Anyway. IF my understanding is correct I don't see why we can't generalize for all debug sections to
- Tombstone to 1 for ranges/loc
- MAXINT for .debug_names
- 0 for everything else.
This will leave current behavior for cases that really matter, GC/ICF, and for relative relocations (32 bit outside of .debug_names), I don't think it should matter?
Hopefully I didn't miss anything. :)
@MaskRay WDYT?
lld/ELF/InputSection.cpp
Outdated
if (isDebugNames && dyn_cast<Undefined>(&sym)) { | ||
uint64_t maxVal = 0; | ||
switch (type) { | ||
case R_X86_64_64: | ||
maxVal = llvm::maxUIntN(64); | ||
break; | ||
case R_X86_64_32: | ||
maxVal = llvm::maxUIntN(32); | ||
break; | ||
default: | ||
llvm_unreachable("Unsupported relocation type in .debug_names."); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Be nice to hear from @MaskRay or otherwise about whether we can generalize this in some way.
@ayermolo perhaps you can check the commit history to see how the -1 as tombstone
worked - it was committed a few years back, so there might be some hint about how to do that that doesn't require special casing each relocation
@@ -0,0 +1,397 @@ | |||
.text |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I imagine if you check the other test cases (for things like the DebugLoc/DebugRanges cases) you'll find they don't use full DWARF but probably just some relocations in an otherwise empty/small section - and we should probably test this in a similar way. (if the DebugLoc/DebugRanges tests do use fully formed DWARF, that'll be a surprise to me, but then happy to work with you to see if we can simplify the test(s) as much as possible in that form)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I can look into simplifying.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Simplified, or does it need to be even further that it's not valid .debug_names section, and just has reloc?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd go further, not making it a valid .debug_names section - like the tests in debug-dead-reloc.s (or it could even be added to that - it tests comdat .text already, so it could test comdat .debug_types too) - that tests special cases for .debug_loc and .debug_ranges, for instance, but doesn't have whole valid loclists or range lists.
(but mostly whatever @MaskRay's happy with)
Updated with more generic version. Step in right direction? |
lld/ELF/InputSection.cpp
Outdated
if (!tombstone && isDebug) | ||
tombstone = debugTombstone; | ||
else if (tombstone) | ||
tombstone = SignExtend64<bits>(*tombstone); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if this is sign extended to 64 bits, but then we have a 32 bit relocation, I'd expect this fails/errors out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is the same as original logic. It would go into if condition if tombstone is specified, and always extend.
bits == 64
So at least on ELF64 this is no-op. So TBH I am not sure why it's there.
template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
static_assert(B > 0, "Bit width can't be 0.");
static_assert(B <= 64, "Bit width out of range.");
return int64_t(x << (64 - B)) >> (64 - B);
}
lld/ELF/InputSection.cpp
Outdated
|
||
// Extend to 64bit MAX for 64 bit relocations, LocalTU, in .debug_names. | ||
if (isDebugNames && type == target.symbolicRel) | ||
tombstone = SignExtend64<32>(*tombstone); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this only handles one case of 64 bit relocations, but there might be others? & I'm not sure isDebugNames
is the special case here - shouldn't it be known by the relocation, rather than the section the relocation is in?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, made it more generic.
I think it's probably going in the right direction - though I'm not sure we'll be quite able to figure out the right thing without some more lld expertise. But trying to make what progress we can in the interim. |
Anyone else we can rope into this? |
…ry in .debug_names
…lTU entry in .debug_names
…ne LocalTU entry in .debug_names
Sorry for my late reply. IIUC |
The description (the draft commit message when the patch eventually lands) can use some clang commands for the curious to play with. Some information can be extracted from
|
lld/ELF/InputSection.cpp
Outdated
@@ -917,8 +925,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { | |||
if (expr == R_NONE) | |||
continue; | |||
|
|||
if (tombstone || | |||
(isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { | |||
if (tombstoneValueToUse) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine))
should be the outer if
and the tombstone logic should be the inner if
.
|
…Tombstone LocalTU entry in .debug_names
@MaskRay ping :) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code-wise this is close, but I want to see #68131 landed so that I can play with .debug_names tombstone values.
My previous comment still applies: the description should describe the choice of UINT32_MAX and UINT64_MAX, so that a curious reader doesn't have to read several external links to understand the choice.
@@ -0,0 +1,26 @@ | |||
.section .debug_info,"G",@progbits,1175092228111723119,comdat |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For newer tests we tend to move away from Inputs/
. dwarf5-debug-names-type-comdat.s and the DWARF64 counterpart can be placed into x86-64-dwarf5-64-debug-names-type-comdat.test
. Search for .ifdef ERR
and llvm-mc --defsym
. We can use some assembly preprocessing to deduplicate the input.
@@ -0,0 +1,16 @@ | |||
// REQUIRES: x86 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In newer tests we make the comment (non-RUN non-CHECK) marker outstanding. We typically use # RUN:
# CHECK:
and ## Regular comments.
@@ -0,0 +1,18 @@ | |||
// REQUIRES: x86 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should place the DWARF64 test into the other file.
Is there a way we could avoid hardcoding this, and just do "max of whatever the relocation size is"? (some earlier versions of this patch had switches over relocation types to try to do this - which seemed a bit verbose/brittle/repetitive - but if there was some way to generalize it tidily, I think that'd be ideal) |
That is already what is happening. We always extend to 64bit MAX, and value is set by relocation type. 32 bit for DWARF32, and 64bit for DWARF64. |
@MaskRay OK, cleaned up tests. |
@MaskRay ping :) |
Sorry for not chiming in earlier. I think the current description is With I think the description can be rephased like the following:
|
const bool isDebugLine = isDebug && name == ".debug_line"; | ||
std::optional<uint64_t> tombstone; | ||
std::optional<uint64_t> tombstone = std::nullopt; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An optional defaults to std::nullopt. Just default initialize it.
# RUN: ld.lld %t.o %t1.o -o %t1 | ||
# RUN: llvm-objdump -s %t1 | FileCheck %s --check-prefix=CHECK32 | ||
|
||
# Test checks that LLD tombstones TU section that was de-duplicated using COMDAT to the maxium value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For newer tests we use ##
for non-RUN non-CHECK comments.
# RUN: ld.lld %t.o %t1.o -o %t1 | ||
# RUN: llvm-objdump -s %t1 | FileCheck %s --check-prefix=CHECK64 | ||
|
||
# Test checks that LLD tombstones TU section that was de-duplicated using COMDAT to the maxium value. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto
# REQUIRES: x86 | ||
|
||
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux --defsym DWARF32=1 %s -o %t.o | ||
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux --defsym DWARF32=1 %s -o %t1.o |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can do ld.lld %t.o %t.o
.
-triple=x86_64
since there is no Linux specific stuff. We omit the OS to essentially say the test is generic to all ELF OSes.
.Lnames_end0: | ||
.endif | ||
|
||
# Test generated with clang++ -g2 -gdwarf-5 -gdwarf64 -gpubnames -fdebug-types-section -S and then manually reduced. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs a C++ source example.
@@ -20,8 +20,7 @@ | |||
#include "llvm/Support/Compression.h" | |||
#include "llvm/Support/Endian.h" | |||
#include "llvm/Support/xxhash.h" | |||
#include <algorithm> | |||
#include <mutex> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should keep the headers. mutex is used by std::mutex in this file.
I think I'd implement the logic quite differently. Let me put up a patch... |
I have created #74686 as an alternative. Thank you for your and @dwblaikie's descriptions to improve my .debug_names understanding :) |
By default LLD uses 0x0... as a tombstone value. For Type Units this is a valid
address. Changed so that UINT{32,64}_MAX value is used for tombstone. The
former is for DWARF32, and latter for DWARF64. The default for LLVM is 32 bit DWARF.
Although 64 bit DWARF is supported in LLVM, it's not widely used. The value was chosen
because it is not a valid offset.
This was introduced as a change to DWARF6 spec:
https://dwarfstd.org/issues/231013.1.html