Skip to content

LLDB Debuginfod tests and a fix or two #90622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion lldb/packages/Python/lldbsuite/test/make/Makefile.rules
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../
#
# GNUWin32 uname gives "windows32" or "server version windows32" while
# some versions of MSYS uname return "MSYS_NT*", but most environments
# standardize on "Windows_NT", so we'll make it consistent here.
# standardize on "Windows_NT", so we'll make it consistent here.
# When running tests from Visual Studio, the environment variable isn't
# inherited all the way down to the process spawned for make.
#----------------------------------------------------------------------
Expand Down Expand Up @@ -210,6 +210,12 @@ else
ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES"
DSYM = $(EXE).debug
endif

ifeq "$(MAKE_DWP)" "YES"
MAKE_DWO := YES
DWP_NAME = $(EXE).dwp
DYLIB_DWP_NAME = $(DYLIB_NAME).dwp
endif
endif

LIMIT_DEBUG_INFO_FLAGS =
Expand Down Expand Up @@ -357,6 +363,7 @@ ifneq "$(OS)" "Darwin"

OBJCOPY ?= $(call replace_cc_with,objcopy)
ARCHIVER ?= $(call replace_cc_with,ar)
DWP ?= $(call replace_cc_with,dwp)
override AR = $(ARCHIVER)
endif

Expand Down Expand Up @@ -527,6 +534,10 @@ ifneq "$(CXX)" ""
endif
endif

ifeq "$(GEN_GNU_BUILD_ID)" "YES"
LDFLAGS += -Wl,--build-id
endif

#----------------------------------------------------------------------
# DYLIB_ONLY variable can be used to skip the building of a.out.
# See the sections below regarding dSYM file as well as the building of
Expand Down Expand Up @@ -565,10 +576,17 @@ else
endif
else
ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES"
ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES"
cp "$(EXE)" "$(EXE).unstripped"
endif
$(OBJCOPY) --only-keep-debug "$(EXE)" "$(DSYM)"
$(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DSYM)" "$(EXE)" "$(EXE)"
endif
ifeq "$(MAKE_DWP)" "YES"
$(DWP) -o "$(DWP_NAME)" $(DWOS)
endif
endif


#----------------------------------------------------------------------
# Make the dylib
Expand Down Expand Up @@ -610,9 +628,15 @@ endif
else
$(LD) $(DYLIB_OBJECTS) $(LDFLAGS) -shared -o "$(DYLIB_FILENAME)"
ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES"
ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES"
cp "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).unstripped"
endif
$(OBJCOPY) --only-keep-debug "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).debug"
$(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DYLIB_FILENAME).debug" "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME)"
endif
ifeq "$(MAKE_DWP)" "YES"
$(DWP) -o $(DYLIB_DWP_FILE) $(DYLIB_DWOS)
endif
endif

#----------------------------------------------------------------------
Expand Down
38 changes: 25 additions & 13 deletions lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4378,26 +4378,38 @@ const std::shared_ptr<SymbolFileDWARFDwo> &SymbolFileDWARF::GetDwpSymbolFile() {
FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths();
ModuleSpec module_spec;
module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec();
FileSpec dwp_filespec;
for (const auto &symfile : symfiles.files()) {
module_spec.GetSymbolFileSpec() =
FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle());
LLDB_LOG(log, "Searching for DWP using: \"{0}\"",
module_spec.GetSymbolFileSpec());
FileSpec dwp_filespec =
dwp_filespec =
PluginManager::LocateExecutableSymbolFile(module_spec, search_paths);
if (FileSystem::Instance().Exists(dwp_filespec)) {
LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec);
DataBufferSP dwp_file_data_sp;
lldb::offset_t dwp_file_data_offset = 0;
ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin(
GetObjectFile()->GetModule(), &dwp_filespec, 0,
FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp,
dwp_file_data_offset);
if (dwp_obj_file) {
m_dwp_symfile = std::make_shared<SymbolFileDWARFDwo>(
*this, dwp_obj_file, DIERef::k_file_index_mask);
break;
}
break;
}
}
if (!FileSystem::Instance().Exists(dwp_filespec)) {
LLDB_LOG(log, "No DWP file found locally");
// Fill in the UUID for the module we're trying to match for, so we can
// find the correct DWP file, as the Debuginfod plugin uses *only* this
// data to correctly match the DWP file with the binary.
module_spec.GetUUID() = m_objfile_sp->GetUUID();
dwp_filespec =
PluginManager::LocateExecutableSymbolFile(module_spec, search_paths);
}
if (FileSystem::Instance().Exists(dwp_filespec)) {
LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec);
DataBufferSP dwp_file_data_sp;
lldb::offset_t dwp_file_data_offset = 0;
ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin(
GetObjectFile()->GetModule(), &dwp_filespec, 0,
FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp,
dwp_file_data_offset);
if (dwp_obj_file) {
m_dwp_symfile = std::make_shared<SymbolFileDWARFDwo>(
*this, dwp_obj_file, DIERef::k_file_index_mask);
}
}
if (!m_dwp_symfile) {
Expand Down
7 changes: 6 additions & 1 deletion lldb/source/Plugins/SymbolLocator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Order matters here: the first symbol locator prevents further searching.
# For DWARF binaries that are both stripped and split, the Default plugin
# will return the stripped binary when asked for the ObjectFile, which then
# prevents an unstripped binary from being requested from the Debuginfod
# provider.
add_subdirectory(Debuginfod)
add_subdirectory(Default)
if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_subdirectory(DebugSymbols)
endif()
add_subdirectory(Debuginfod)
29 changes: 27 additions & 2 deletions lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,24 @@ llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() {
"executables.";
}

// If this is needed elsewhere, it can be exported/moved.
static bool IsDwpSymbolFile(const lldb::ModuleSP &module_sp,
const FileSpec &file_spec) {
DataBufferSP dwp_file_data_sp;
lldb::offset_t dwp_file_data_offset = 0;
// Try to create an ObjectFile from the file_spec.
ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin(
module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec),
dwp_file_data_sp, dwp_file_data_offset);
// The presence of a debug_cu_index section is the key identifying feature of
// a DWP file. Make sure we don't fill in the section list on dwp_obj_file
// (by calling GetSectionList(false)) as this function could be called before
// we may have all the symbol files collected and available.
return dwp_obj_file && ObjectFileELF::classof(dwp_obj_file.get()) &&
dwp_obj_file->GetSectionList(false)->FindSectionByType(
eSectionTypeDWARFDebugCuIndex, false);
}

// CreateInstance
//
// Platforms can register a callback to use when creating symbol vendors to
Expand Down Expand Up @@ -87,8 +105,15 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp,
FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths();
FileSpec dsym_fspec =
PluginManager::LocateExecutableSymbolFile(module_spec, search_paths);
if (!dsym_fspec)
return nullptr;
if (!dsym_fspec || IsDwpSymbolFile(module_sp, dsym_fspec)) {
// If we have a stripped binary or if we got a DWP file, we should prefer
// symbols in the executable acquired through a plugin.
ModuleSpec unstripped_spec =
PluginManager::LocateExecutableObjectFile(module_spec);
if (!unstripped_spec)
return nullptr;
dsym_fspec = unstripped_spec.GetFileSpec();
}
Copy link
Collaborator

@DavidSpickett DavidSpickett May 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something about this if block has broken Expr/TestStringLiteralExpr.test on Arm 32 bit, a platform that has been sensitive to changes in this area in the past. The test is built without debug info and something happens when the test file comes through here. For ld.so and the libc it's all the same as before.

(lldb) expr "hello there"
lldb             ProcessGDBRemote::DoAllocateMemory no direct stub support for memory allocation, and InferiorCallMmap also failed - is stub missing register context save/restore capability?

Last time this happened it was because we lost the symbols that let us call mmap, but those are in ld.so usually so I'm not sure what's the problem this time.

This means we fall back the interpreter, which we didn't need to do before.

error: Can't evaluate the expression without a running target due to: Interpreter doesn't handle one of the expression's operands

I've reverted this PR (327bfc9) and the follow up while I investigate locally.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We try to allocate memory to JIT the expression and calling mmap fails with a signal. In #68987 this was because we had lost the section info that told us whether to call it as Thumb or Arm, if we get that wrong it causes a SIGILL, so it could be the same thing again.

I will look into it more next week and assuming I find a fix, reland the changes for you.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DavidSpickett if there's anything I can do to help, please ping me. Feel free to use my github handle at hotmail to ping me, as I'm going to be on a "doing nothing in nicer weather" vacation next week.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I've figured out so far. It's like the previous issue I mentioned, except it's related to the program file, not ld.so.

If we look at the memory regions before this PR we see:

[0x00000000f7fe2000-0x00000000f7fe9000) ---
[0x00000000f7fe9000-0x00000000f7fea000) rw-
[0x00000000f7fea000-0x00000000f7feb000) r-- objc_imageinfo
[0x00000000f7feb000-0x00000000f7fec000) r-x .text

After we just have:

[0x00000000f7fe2000-0x00000000f7fec000) ---

Which means we have lost, or never loaded, the section information for this program file. Which makes sense given that it's compiled without debug information. We'll ask a plugin for the debug info and none of those will have it, so we exit early.

Problem is, Arm has 2 execution modes. Arm and Thumb. To know how to break properly in different code we look at markers on the sections. These set the AddressClass of the address value in LLDB to the right value.

In this case, we need to call mmap. This is in a Thumb section in ld.so, and we have it's section information. We don't need to break here though, only set the control register (cpsr) bit to indicate we want to run in Thumb mode, then write the PC to point to this location.

For the end of mmap, we need to return somewhere. So lldb says, why not _start, it's something we can count on existing on Linux. So LLDB needs to set the link register to that address, then place a breakpoint on the address to catch the program just as mmap finishes.

Problem is, after this PR we don't have section information to tell us that this area is Thumb. This means that Platform::GetSoftwareBreakpointTrapOpcode chooses an Arm breakpoint code. Now I don't know exactly what goes wrong there, I think in Thumb mode the program sees this encoding as some kind of backwards branch, and it ends up basically in the zero page and segfaults from there. This is why the call to mmap fails and we fall back to the interpreter.

So ideally we want to split the two concepts of 1. Section information and 2. Symbol file. So that we can load both without resetting the other. I'm going to look into that next.

And for some context, I think Arm is the only platform this section information is crucial. Potentially MIPS but also we dropped Linux MIPS support, and I don't know if those platforms mixed modes ever. So it's not surprising you didn't see this testing on more modern platforms.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#91585 fixes the underlying issue, if/when that's proven to work, I'll put this PR back in.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That PR is in, so at least Arm Linux is happy for this to go back in.


DataBufferSP dsym_file_data_sp;
lldb::offset_t dsym_file_data_offset = 0;
Expand Down
19 changes: 19 additions & 0 deletions lldb/test/API/debuginfod/Normal/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
C_SOURCES := main.c

# For normal (non DWP) Debuginfod tests, we need:

# * The full binary: a.out.unstripped
# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and
# SPLIT_DEBUG_SYMBOLS set to YES

# * The stripped binary (a.out)
# Produced by Makefile.rules with SPLIT_DEBUG_SYMBOLS set to YES

# * The 'only-keep-debug' binary (a.out.debug)
# Produced below

SPLIT_DEBUG_SYMBOLS := YES
SAVE_FULL_DEBUG_BINARY := YES
GEN_GNU_BUILD_ID := YES

include Makefile.rules
183 changes: 183 additions & 0 deletions lldb/test/API/debuginfod/Normal/TestDebuginfod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import os
import shutil
import tempfile

import lldb
from lldbsuite.test.decorators import *
import lldbsuite.test.lldbutil as lldbutil
from lldbsuite.test.lldbtest import *


"""
Test support for the DebugInfoD network symbol acquisition protocol.
This one is for simple / no split-dwarf scenarios.

For no-split-dwarf scenarios, there are 2 variations:
1 - A stripped binary with it's corresponding unstripped binary:
2 - A stripped binary with a corresponding --only-keep-debug symbols file
"""


class DebugInfodTests(TestBase):
# No need to try every flavor of debug inf.
NO_DEBUG_INFO_TESTCASE = True

def setUp(self):
TestBase.setUp(self)
# Don't run these tests if we don't have Debuginfod support
if "Debuginfod" not in configuration.enabled_plugins:
self.skipTest("The Debuginfod SymbolLocator plugin is not enabled")

def test_normal_no_symbols(self):
"""
Validate behavior with no symbols or symbol locator.
('baseline negative' behavior)
"""
test_root = self.config_test(["a.out"])
self.try_breakpoint(False)

def test_normal_default(self):
"""
Validate behavior with symbols, but no symbol locator.
('baseline positive' behavior)
"""
test_root = self.config_test(["a.out", "a.out.debug"])
self.try_breakpoint(True)

def test_debuginfod_symbols(self):
"""
Test behavior with the full binary available from Debuginfod as
'debuginfo' from the plug-in.
"""
test_root = self.config_test(["a.out"], "a.out.unstripped")
self.try_breakpoint(True)

def test_debuginfod_executable(self):
"""
Test behavior with the full binary available from Debuginfod as
'executable' from the plug-in.
"""
test_root = self.config_test(["a.out"], None, "a.out.unstripped")
self.try_breakpoint(True)

def test_debuginfod_okd_symbols(self):
"""
Test behavior with the 'only-keep-debug' symbols available from Debuginfod.
"""
test_root = self.config_test(["a.out"], "a.out.debug")
self.try_breakpoint(True)

def try_breakpoint(self, should_have_loc):
"""
This function creates a target from self.aout, sets a function-name
breakpoint, and checks to see if we have a file/line location,
as a way to validate that the symbols have been loaded.
should_have_loc specifies if we're testing that symbols have or
haven't been loaded.
"""
target = self.dbg.CreateTarget(self.aout)
self.assertTrue(target and target.IsValid(), "Target is valid")

bp = target.BreakpointCreateByName("func")
self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid")
self.assertEqual(bp.GetNumLocations(), 1)

loc = bp.GetLocationAtIndex(0)
self.assertTrue(loc and loc.IsValid(), "Location is valid")
addr = loc.GetAddress()
self.assertTrue(addr and addr.IsValid(), "Loc address is valid")
line_entry = addr.GetLineEntry()
self.assertEqual(
should_have_loc,
line_entry != None and line_entry.IsValid(),
"Loc line entry is valid",
)
if should_have_loc:
self.assertEqual(line_entry.GetLine(), 4)
self.assertEqual(
line_entry.GetFileSpec().GetFilename(),
self.main_source_file.GetFilename(),
)
self.dbg.DeleteTarget(target)
shutil.rmtree(self.tmp_dir)

def config_test(self, local_files, debuginfo=None, executable=None):
"""
Set up a test with local_files[] copied to a different location
so that we control which files are, or are not, found in the file system.
Also, create a stand-alone file-system 'hosted' debuginfod server with the
provided debuginfo and executable files (if they exist)

Make the filesystem look like:

/tmp/<tmpdir>/test/[local_files]

/tmp/<tmpdir>/cache (for lldb to use as a temp cache)

/tmp/<tmpdir>/buildid/<uuid>/executable -> <executable>
/tmp/<tmpdir>/buildid/<uuid>/debuginfo -> <debuginfo>
Returns the /tmp/<tmpdir> path
"""

self.build()

uuid = self.getUUID("a.out")
if not uuid:
self.fail("Could not get UUID for a.out")
return
self.main_source_file = lldb.SBFileSpec("main.c")
self.tmp_dir = tempfile.mkdtemp()
test_dir = os.path.join(self.tmp_dir, "test")
os.makedirs(test_dir)

self.aout = ""
# Copy the files used by the test:
for f in local_files:
shutil.copy(self.getBuildArtifact(f), test_dir)
# The first item is the binary to be used for the test
if self.aout == "":
self.aout = os.path.join(test_dir, f)

use_debuginfod = debuginfo != None or executable != None

# Populated the 'file://... mocked' Debuginfod server:
if use_debuginfod:
os.makedirs(os.path.join(self.tmp_dir, "cache"))
uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid)
os.makedirs(uuid_dir)
if debuginfo:
shutil.copy(
self.getBuildArtifact(debuginfo),
os.path.join(uuid_dir, "debuginfo"),
)
if executable:
shutil.copy(
self.getBuildArtifact(executable),
os.path.join(uuid_dir, "executable"),
)

# Configure LLDB for the test:
self.runCmd(
"settings set symbols.enable-external-lookup %s"
% str(use_debuginfod).lower()
)
self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls")
if use_debuginfod:
self.runCmd(
"settings set plugin.symbol-locator.debuginfod.cache-path %s/cache"
% self.tmp_dir
)
self.runCmd(
"settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s"
% self.tmp_dir
)

def getUUID(self, filename):
try:
target = self.dbg.CreateTarget(self.getBuildArtifact(filename))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't actually need to create a target for this. Something like:

spec = lldb.SBModuleSpec()
spec.SetFileSpec(self.getBuildArtifact(filename))
lldb.SBModule(spec).GetUUIDString()

ought to suffice.

Copy link
Collaborator

@labath labath May 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this check is being too strict (and could be the cause of the reported failures). There's no requirement that the build id be exactly 10 bytes long, and in fact different linkers will use different build id lengths (corresponding to different checksum algorithms).

If all you wanted was to rule out lldb's fake crc-based UUIDs, then I suggest something like len(uuid) > 8.
If you absolutely must have a 10 byte uuid, you need to pass --build-id=sha1 to the linker.

module = target.GetModuleAtIndex(0)
uuid = module.GetUUIDString().replace("-", "").lower()
self.dbg.DeleteTarget(target)
return uuid if len(uuid) == 40 else None
except:
return None
7 changes: 7 additions & 0 deletions lldb/test/API/debuginfod/Normal/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// This is a dump little pair of test files

int func(int argc, const char *argv[]) {
return (argc + 1) * (argv[argc][0] + 2);
}

int main(int argc, const char *argv[]) { return func(0, argv); }
Loading
Loading