-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LLDB][ProcessELFCore] Add Description to ProcessELFCore/ELFThread stop reasons #110065
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Jacob Lalonde (Jlalond) ChangesThis fixes a functionality gap with GDB, where GDB will properly decode the stop reason and give the address for SIGSEGV. I also added descriptions to all stop reasons, following the same code path that the Native Linux Thread uses. Full diff: https://github.com/llvm/llvm-project/pull/110065.diff 3 Files Affected:
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 7955594bf5d94c..468a3b8934e741 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -931,6 +931,7 @@ llvm::Error ProcessElfCore::parseLinuxNotes(llvm::ArrayRef<CoreNote> notes) {
return status.ToError();
thread_data.signo = siginfo.si_signo;
thread_data.code = siginfo.si_code;
+ thread_data.description = siginfo.GetDescription();
break;
}
case ELF::NT_FILE: {
diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
index 52b96052bdbeca..e9ae5211c28a53 100644
--- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp
@@ -9,6 +9,7 @@
#include "lldb/Target/RegisterContext.h"
#include "lldb/Target/StopInfo.h"
#include "lldb/Target/Target.h"
+#include "lldb/Target/UnixSignals.h"
#include "lldb/Target/Unwind.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/LLDBLog.h"
@@ -49,8 +50,8 @@ using namespace lldb_private;
// Construct a Thread object with given data
ThreadElfCore::ThreadElfCore(Process &process, const ThreadData &td)
: Thread(process, td.tid), m_thread_name(td.name), m_thread_reg_ctx_sp(),
- m_signo(td.signo), m_code(td.code), m_gpregset_data(td.gpregset),
- m_notes(td.notes) {}
+ m_signo(td.signo), m_code(td.code), m_sig_description(td.description),
+ m_gpregset_data(td.gpregset), m_notes(td.notes) {}
ThreadElfCore::~ThreadElfCore() { DestroyThread(); }
@@ -241,7 +242,7 @@ bool ThreadElfCore::CalculateStopInfo() {
return false;
SetStopInfo(StopInfo::CreateStopReasonWithSignal(
- *this, m_signo, /*description=*/nullptr, m_code));
+ *this, m_signo, m_sig_description.c_str(), m_code));
return true;
}
@@ -543,7 +544,8 @@ size_t ELFLinuxSigInfo::GetSize(const lldb_private::ArchSpec &arch) {
Status ELFLinuxSigInfo::Parse(const DataExtractor &data, const ArchSpec &arch) {
Status error;
- if (GetSize(arch) > data.GetByteSize()) {
+ uint64_t size = GetSize(arch);
+ if (size > data.GetByteSize()) {
error = Status::FromErrorStringWithFormat(
"NT_SIGINFO size should be %zu, but the remaining bytes are: %" PRIu64,
GetSize(arch), data.GetByteSize());
@@ -556,6 +558,34 @@ Status ELFLinuxSigInfo::Parse(const DataExtractor &data, const ArchSpec &arch) {
si_signo = data.GetU32(&offset);
si_errno = data.GetU32(&offset);
si_code = data.GetU32(&offset);
+ // 64b ELF have a 4 byte pad.
+ if (data.GetAddressByteSize() == 8)
+ offset += 4;
+ switch (si_signo) {
+ case SIGFPE:
+ case SIGILL:
+ case SIGSEGV:
+ case SIGBUS:
+ case SIGTRAP:
+ addr = (void *)data.GetAddress(&offset);
+ addr_lsb = data.GetU16(&offset);
+ return error;
+ default:
+ return error;
+ }
+}
- return error;
+std::string ELFLinuxSigInfo::GetDescription() {
+ switch (si_signo) {
+ case SIGFPE:
+ case SIGILL:
+ case SIGSEGV:
+ case SIGBUS:
+ case SIGTRAP:
+ return lldb_private::UnixSignals::CreateForHost()->GetSignalDescription(
+ si_signo, si_code, reinterpret_cast<uintptr_t>(addr));
+ default:
+ return lldb_private::UnixSignals::CreateForHost()->GetSignalDescription(
+ si_signo, si_code);
+ }
}
diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
index 3fa0b8b0eedb0b..3c6c02f73efae8 100644
--- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
@@ -75,16 +75,25 @@ struct ELFLinuxPrStatus {
static_assert(sizeof(ELFLinuxPrStatus) == 112,
"sizeof ELFLinuxPrStatus is not correct!");
+union ELFSigval {
+ int sival_int;
+ void *sival_ptr;
+};
+
struct ELFLinuxSigInfo {
- int32_t si_signo;
- int32_t si_code;
+ int32_t si_signo; // Order matters for the first 3.
int32_t si_errno;
+ int32_t si_code;
+ void *addr; /* faulting insn/memory ref. */
+ int32_t addr_lsb; /* Valid LSB of the reported address. */
ELFLinuxSigInfo();
lldb_private::Status Parse(const lldb_private::DataExtractor &data,
const lldb_private::ArchSpec &arch);
+ std::string GetDescription();
+
// Return the bytesize of the structure
// 64 bit - just sizeof
// 32 bit - hardcoded because we are reusing the struct, but some of the
@@ -93,7 +102,7 @@ struct ELFLinuxSigInfo {
static size_t GetSize(const lldb_private::ArchSpec &arch);
};
-static_assert(sizeof(ELFLinuxSigInfo) == 12,
+static_assert(sizeof(ELFLinuxSigInfo) == 32,
"sizeof ELFLinuxSigInfo is not correct!");
// PRPSINFO structure's size differs based on architecture.
@@ -144,7 +153,9 @@ struct ThreadData {
lldb::tid_t tid;
int signo = 0;
int code = 0;
+ void *sigaddr = nullptr;
int prstatus_sig = 0;
+ std::string description;
std::string name;
};
@@ -183,6 +194,7 @@ class ThreadElfCore : public lldb_private::Thread {
int m_signo;
int m_code;
+ std::string m_sig_description;
lldb_private::DataExtractor m_gpregset_data;
std::vector<lldb_private::CoreNote> m_notes;
|
Looks like I got some failures due to the tests expecting an exact string match. Will fix |
So does this not need new testing or did you not add that yet? I'm not clear on what fully decode means because at least for certain signals like a tag check fault we do have a description for live processes and core files. Perhaps that's because I added those as special cases I don't remember the specifics. |
Seems reasonable, but you'll need to use the signal constants from |
I only added part of the description - 85bc498. Didn't add the fault address. Happy to see improvements here! |
I actually tried to yamlize one of my test crash dumps. The issue I encountered was the PT_NOTE sections wasn't being generated with content. Is there a way with obj2yaml to generate PT_NOTES with content? |
You're referring to the properties in the ELFSignal object? |
int32_t si_errno; | ||
int32_t si_code; | ||
void *addr; /* faulting insn/memory ref. */ | ||
int32_t addr_lsb; /* Valid LSB of the reported address. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we name this one as similar to the exact siginfo structure?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't use the si_
prefixed variables because siginfo_t.h
defines these all as Macros in the global namespace, and I didn't want to couple our build to the existence of that macro
I'm referring to all of the places where you have |
yaml2obj was originally written for llvm testing, so it mainly focuses on sections and other things that are interesting there. Support for program headers came later, and it's done on an on-demand and self-serve basis. Also, obj2yaml is generally in a better state than yaml2obj, so you may be able to use it to create the file you need, but you may need to write some yaml by hand. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the Python code formatter. |
I'm going to ignore my obj2yaml case here as we have some valid core tests that I updated. But I may tag in the future for obj2yaml to add content section to sufficiently small PT_Notes in the future! |
static bool IsSignalWithAddrValue(int signo) { | ||
// SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP | ||
// We can't use the enum here because it may not be available on windows or | ||
// other platforms. We should make an LLDB platform agnostic enum for this |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We already have that. This data is encoded in the process->GetUnixSignals()
object. You just need to plumb it to the right place.
|
||
std::string ELFLinuxSigInfo::GetDescription() { | ||
if (IsSignalWithAddrValue(si_signo)) | ||
return lldb_private::UnixSignals::CreateForHost()->GetSignalDescription( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And this shouldn't be using the Host UnixSignals object for the same reason. Get the one from the Process
instance.
8d0f382
to
e98ac53
Compare
int32_t si_errno; | ||
int32_t si_code; | ||
lldb::addr_t addr; /* faulting insn/memory ref. */ | ||
int32_t addr_lsb; /* Valid LSB of the reported address. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this if we aren't using it?
int32_t si_errno; | ||
int32_t si_code; | ||
lldb::addr_t addr; /* faulting insn/memory ref. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
name si_addr
just like in the siginfo_t
structure since the other names are using this convention. You might want to mirror more what the definition is by adding a anonymous union and then add the _sigfault
entry to match the real definition.
I compiled in linux and this is what it looks like:
(siginfo_t) siginfo = {
(int) si_signo = 0
(int) si_errno = 0
(int) si_code = 0
(int) __pad0 = 0
(siginfo_t::(unnamed union)) _sifields = {
(int[28]) _pad = {
(int) [0] = 0
(int) [1] = 0
(int) [2] = 0
(int) [3] = 0
(int) [4] = 0
(int) [5] = 0
(int) [6] = 0
(int) [7] = 0
(int) [8] = 0
(int) [9] = 0
(int) [10] = 0
(int) [11] = 0
(int) [12] = 0
(int) [13] = 0
(int) [14] = 0
(int) [15] = 0
(int) [16] = 0
(int) [17] = 0
(int) [18] = 0
(int) [19] = 0
(int) [20] = 0
(int) [21] = 0
(int) [22] = 0
(int) [23] = 0
(int) [24] = 0
(int) [25] = 0
(int) [26] = 0
(int) [27] = 0
}
(siginfo_t::(unnamed struct)) _kill = {
(__pid_t) si_pid = 0
(__uid_t) si_uid = 0
}
(siginfo_t::(unnamed struct)) _timer = {
(int) si_tid = 0
(int) si_overrun = 0
(__sigval_t) si_sigval = {
(int) sival_int = 0
(void *) sival_ptr = 0x0000000000000000
}
}
(siginfo_t::(unnamed struct)) _rt = {
(__pid_t) si_pid = 0
(__uid_t) si_uid = 0
(__sigval_t) si_sigval = {
(int) sival_int = 0
(void *) sival_ptr = 0x0000000000000000
}
}
(siginfo_t::(unnamed struct)) _sigchld = {
(__pid_t) si_pid = 0
(__uid_t) si_uid = 0
(int) si_status = 0
(__clock_t) si_utime = 0
(__clock_t) si_stime = 0
}
(siginfo_t::(unnamed struct)) _sigfault = {
(void *) si_addr = 0x0000000000000000
(short) si_addr_lsb = 0
(siginfo_t::(unnamed union)) _bounds = {
(siginfo_t::(unnamed struct)) _addr_bnd = {
(void *) _lower = 0x0000000000000000
(void *) _upper = 0x0000000000000000
}
(__uint32_t) _pkey = 0
}
}
(siginfo_t::(unnamed struct)) _sigpoll = {
(long) si_band = 0
(int) si_fd = 0
}
(siginfo_t::(unnamed struct)) _sigsys = {
(void *) _call_addr = 0x0000000000000000
(int) _syscall = 0
(unsigned int) _arch = 0
}
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to have to disagree with Greg here. The internal layout of siginfo_t
differs even for different linux architectures, so I don't think that matching any one particular system is doing to be helpful. Matching the exact layout also still wouldn't let us write things like our_siginfo.si_addr
as that depends on the host macro, which will not be defined everywhere. (And it also would require using reserved identifiers)
The public interface (si_addr
), is the same everywhere, so matching that would be better. Now, we can't use that name exactly, as it is a macro, but I we can use something that's closer to it. I'd suggest si_addr_
, with a small comment that the other name is a macro.
Another thing I'd add here is that we now have (as in, we did not have it when this class was originally written), a way to get the siginfo_t definition precisely matching the os/architecture of the core file -- via Platform::GetSiginfoType()
. I wouldn't say it's a requirement for this patch, but if you find yourself needing to match the exact layout of a siginfo struct, that's definitely the way I'd recommend. Since this is a CompilerType
, it will take some refactoring to make it work with the code here, but the nice thing about it is that, once you have everything wired up, you will be able to dump the signal information for a thread with the thread siginfo
command -- even if you don't have any debug information for the core file.
// the unix_signals_sp->GetSignalDescription() call below. | ||
if (unix_signals_sp->GetShouldStop(si_signo)) { | ||
addr = data.GetAddress(&offset); | ||
addr_lsb = data.GetU16(&offset); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove addr_lsb
if we don't use it.
offset += 4; | ||
// Not every stop signal has a valid address, but that will get resolved in | ||
// the unix_signals_sp->GetSignalDescription() call below. | ||
if (unix_signals_sp->GetShouldStop(si_signo)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GetShouldStop
is called when determining whether we should automatically resume an application when it gets interrupted by a signal. Using it in a core file is.. unusual, as there is nothing to resume. What exactly are you trying to achieve here?
int32_t si_errno; | ||
int32_t si_code; | ||
lldb::addr_t addr; /* faulting insn/memory ref. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to have to disagree with Greg here. The internal layout of siginfo_t
differs even for different linux architectures, so I don't think that matching any one particular system is doing to be helpful. Matching the exact layout also still wouldn't let us write things like our_siginfo.si_addr
as that depends on the host macro, which will not be defined everywhere. (And it also would require using reserved identifiers)
The public interface (si_addr
), is the same everywhere, so matching that would be better. Now, we can't use that name exactly, as it is a macro, but I we can use something that's closer to it. I'd suggest si_addr_
, with a small comment that the other name is a macro.
Another thing I'd add here is that we now have (as in, we did not have it when this class was originally written), a way to get the siginfo_t definition precisely matching the os/architecture of the core file -- via Platform::GetSiginfoType()
. I wouldn't say it's a requirement for this patch, but if you find yourself needing to match the exact layout of a siginfo struct, that's definitely the way I'd recommend. Since this is a CompilerType
, it will take some refactoring to make it work with the code here, but the nice thing about it is that, once you have everything wired up, you will be able to dump the signal information for a thread with the thread siginfo
command -- even if you don't have any debug information for the core file.
const lldb::UnixSignalsSP unix_signals_sp); | ||
|
||
std::string GetDescription(const lldb::UnixSignalsSP unix_signals_sp); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use const UnixSignals&
. const lldb::UnixSignalsSP
is a pointer to a mutable object, and you're not mutating it, nor storing long-term references to it.
397f094
to
acbc3a0
Compare
// Copied from siginfo_t so we don't have to include signal.h on non 'Nix | ||
// builds, we add `g` to the si_ prefix because siginfo_t defines them as | ||
// macros. | ||
struct { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should still make an anonymous union here for if/when we add more entries that overlap like in siginfo_t
. Since this isn't C code, we should also put the _sigfault
at the top:
union {
struct _sigfault {
/* used when si_code=SEGV_PKUERR */ | ||
uint32_t _pkey; | ||
} _bounds; | ||
} _sigfault; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove name from the end of the struct sincde we are using C++.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't the name at the end of the struct denote it's also a member? I moved these to the top and then I then had to separately declare a member of my new struct type.
} _addr_bnd; | ||
/* used when si_code=SEGV_PKUERR */ | ||
uint32_t _pkey; | ||
} _bounds; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to top of union:
union _bounds {
struct { | ||
lldb::addr_t _lower; | ||
lldb::addr_t _upper; | ||
} _addr_bnd; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to struct line:
struct _addr_bnd;
int signo = 0; | ||
int code = 0; | ||
int prstatus_sig = 0; | ||
std::string description; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We might be able to remove all of these and just replace these with a lldb::StopInfoSP
and always either create a lldb::StopInfoSP
for the thread when we create the thread instead of duplicating info from the siginfo_t
struct here, we don't want to keep adding more fields here in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to lightly push back on this. ProcessElfCore currently depends a lot on threaddata, not just for populating the thread but for some ELF specific logic.
I think what we could do is forego having ELFSigInfo populate itself and instead directly populate threaddata, which prevents us from double allocating, then we can have ThreadElfCore
continue creating a StopInfoSP
in CalculateStopInfo
.
int m_signo; | ||
int m_code; | ||
std::string m_sig_description; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See lldb::StopInfoSP
comment above. We seem to be duplicating info from the siginto_t
struct in both the ThreadData
and in the ThreadELFCore
classes.
… and rework parsing and description generating code
…e read 0 from NT_SIGINFO
65c3d18
to
a4b7b3f
Compare
We're also seeing this issue. |
Ack, sorry about the delay. I will get this fixed today. |
This fixes a functionality gap with GDB, where GDB will properly decode the stop reason and give the address for SIGSEGV. I also added descriptions to all stop reasons, following the same code path that the Native Linux Thread uses.