Skip to content

[lldb] Fix and speedup the memory find command #104193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 35 additions & 34 deletions lldb/source/Target/Process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,33 +114,6 @@ class ProcessOptionValueProperties
}
};

class ProcessMemoryIterator {
public:
ProcessMemoryIterator(Process &process, lldb::addr_t base)
: m_process(process), m_base_addr(base) {}

bool IsValid() { return m_is_valid; }

uint8_t operator[](lldb::addr_t offset) {
if (!IsValid())
return 0;

uint8_t retval = 0;
Status error;
if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) {
m_is_valid = false;
return 0;
}

return retval;
}

private:
Process &m_process;
const lldb::addr_t m_base_addr;
bool m_is_valid = true;
};

static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
{
eFollowParent,
Expand Down Expand Up @@ -3379,21 +3352,49 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
if (region_size < size)
return LLDB_INVALID_ADDRESS;

// See "Boyer-Moore string search algorithm".
std::vector<size_t> bad_char_heuristic(256, size);
ProcessMemoryIterator iterator(*this, low);

for (size_t idx = 0; idx < size - 1; idx++) {
decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
bad_char_heuristic[bcu_idx] = size - idx - 1;
}
for (size_t s = 0; s <= (region_size - size);) {

// Memory we're currently searching through.
llvm::SmallVector<uint8_t, 0> mem;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are setting N = 0 for the llvm::SmallVector, might as well just use std::vector?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using the SmallVector for the resize_for_overwrite functionality (allocating memory without initializing it).

// Position of the memory buffer.
addr_t mem_pos = low;
// Maximum number of bytes read (and buffered). We need to read at least
// `size` bytes for a successful match.
const size_t max_read_size = std::max<size_t>(size, 0x10000);

for (addr_t cur_addr = low; cur_addr <= (high - size);) {
if (cur_addr + size > mem_pos + mem.size()) {
// We need to read more data. We don't attempt to reuse the data we've
// already read (up to `size-1` bytes from `cur_addr` to
// `mem_pos+mem.size()`). This is fine for patterns much smaller than
// max_read_size. For very
// long patterns we may need to do something more elaborate.
mem.resize_for_overwrite(max_read_size);
Status error;
mem.resize(ReadMemory(cur_addr, mem.data(),
std::min(mem.size(), high - cur_addr), error));
mem_pos = cur_addr;
if (size > mem.size()) {
// We didn't read enough data. Skip to the next memory region.
MemoryRegionInfo info;
error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
if (error.Fail())
break;
cur_addr = info.GetRange().GetRangeEnd();
continue;
}
}
int64_t j = size - 1;
while (j >= 0 && buf[j] == iterator[s + j])
while (j >= 0 && buf[j] == mem[cur_addr + j - mem_pos])
j--;
if (j < 0)
return low + s;
else
s += bad_char_heuristic[iterator[s + size - 1]];
return cur_addr; // We have a match.
cur_addr += bad_char_heuristic[mem[cur_addr + size - 1 - mem_pos]];
}

return LLDB_INVALID_ADDRESS;
Expand Down
14 changes: 14 additions & 0 deletions lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def _prepare_inferior(self):
# inside the holes we've deliberately left empty.
self.memory = self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
self.pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
self.num_pages = (
self.target().FindFirstGlobalVariable("num_pages").GetValueAsUnsigned()
)
positions = self.frame().FindVariable("positions")
self.positions = [
positions.GetChildAtIndex(i).GetValueAsUnsigned()
Expand All @@ -59,3 +62,14 @@ def test_memory_read(self):
self.assertEqual(len(content), self.pagesize)
self.assertEqual(content[0:7], b"needle\0")
self.assertTrue(error.Fail())

@expectedFailureWindows
def test_memory_find(self):
self._prepare_inferior()

matches = [f"data found at location: {p:#x}" for p in self.positions]
self.expect(
f'memory find --count {len(self.positions)+1} --string "needle" '
f"{self.memory:#x} {self.memory+self.pagesize*self.num_pages:#x}",
substrs=matches + ["no more matches within the range"],
)
Loading