Skip to content

Commit ecd84ae

Browse files
committed
[lldb] Fix and speedup the memory find command
This patch fixes an issue where the `memory find` command would effectively stop searching after encountering a memory read error (which could happen due to unreadable memory), without giving any indication that it has done so (it would just print it could not find the pattern). To make matters worse, it would not terminate after encountering this error, but rather proceed to slowly increment the address pointer, which meant that searching a large region could take a very long time (and give the appearance that lldb is actually searching for the thing). The patch fixes this first problem (*) by detecting read errors and skipping over (using GetMemoryRegionInfo) the unreadable parts of memory and resuming the search after them. It also reads the memory in bulk (up to 1MB), which speeds up the search significantly (up to 6x for live processes, 18x for core files). (*) The fix does not work on windows yet, because the ReadMemory API does not return partial results (like it does for other systems). I'm preparing a separate patch to deal with that.
1 parent e5c7cde commit ecd84ae

File tree

2 files changed

+47
-34
lines changed

2 files changed

+47
-34
lines changed

lldb/source/Target/Process.cpp

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -114,33 +114,6 @@ class ProcessOptionValueProperties
114114
}
115115
};
116116

117-
class ProcessMemoryIterator {
118-
public:
119-
ProcessMemoryIterator(Process &process, lldb::addr_t base)
120-
: m_process(process), m_base_addr(base) {}
121-
122-
bool IsValid() { return m_is_valid; }
123-
124-
uint8_t operator[](lldb::addr_t offset) {
125-
if (!IsValid())
126-
return 0;
127-
128-
uint8_t retval = 0;
129-
Status error;
130-
if (0 == m_process.ReadMemory(m_base_addr + offset, &retval, 1, error)) {
131-
m_is_valid = false;
132-
return 0;
133-
}
134-
135-
return retval;
136-
}
137-
138-
private:
139-
Process &m_process;
140-
const lldb::addr_t m_base_addr;
141-
bool m_is_valid = true;
142-
};
143-
144117
static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
145118
{
146119
eFollowParent,
@@ -3379,21 +3352,49 @@ lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
33793352
if (region_size < size)
33803353
return LLDB_INVALID_ADDRESS;
33813354

3355+
// See "Boyer-Moore string search algorithm".
33823356
std::vector<size_t> bad_char_heuristic(256, size);
3383-
ProcessMemoryIterator iterator(*this, low);
3384-
33853357
for (size_t idx = 0; idx < size - 1; idx++) {
33863358
decltype(bad_char_heuristic)::size_type bcu_idx = buf[idx];
33873359
bad_char_heuristic[bcu_idx] = size - idx - 1;
33883360
}
3389-
for (size_t s = 0; s <= (region_size - size);) {
3361+
3362+
// Memory we're currently searching through.
3363+
llvm::SmallVector<uint8_t, 0> mem;
3364+
// Position of the memory buffer.
3365+
addr_t mem_pos = low;
3366+
// Maximum number of bytes read (and buffered). We need to read at least
3367+
// `size` bytes for a successful match.
3368+
const size_t max_read_size = std::max<size_t>(size, 0x10000);
3369+
3370+
for (addr_t cur_addr = low; cur_addr <= (high - size);) {
3371+
if (cur_addr + size > mem_pos + mem.size()) {
3372+
// We need to read more data. We don't attempt to reuse the data we've
3373+
// already read (up to `size-1` bytes from `cur_addr` to
3374+
// `mem_pos+mem.size()`). This is fine for patterns much smaller than
3375+
// max_read_size. For very
3376+
// long patterns we may need to do something more elaborate.
3377+
mem.resize_for_overwrite(max_read_size);
3378+
Status error;
3379+
mem.resize(ReadMemory(cur_addr, mem.data(),
3380+
std::min(mem.size(), high - cur_addr), error));
3381+
mem_pos = cur_addr;
3382+
if (size > mem.size()) {
3383+
// We didn't read enough data. Skip to the next memory region.
3384+
MemoryRegionInfo info;
3385+
error = GetMemoryRegionInfo(mem_pos + mem.size(), info);
3386+
if (error.Fail())
3387+
break;
3388+
cur_addr = info.GetRange().GetRangeEnd();
3389+
continue;
3390+
}
3391+
}
33903392
int64_t j = size - 1;
3391-
while (j >= 0 && buf[j] == iterator[s + j])
3393+
while (j >= 0 && buf[j] == mem[cur_addr + j - mem_pos])
33923394
j--;
33933395
if (j < 0)
3394-
return low + s;
3395-
else
3396-
s += bad_char_heuristic[iterator[s + size - 1]];
3396+
return cur_addr; // We have a match.
3397+
cur_addr += bad_char_heuristic[mem[cur_addr + size - 1 - mem_pos]];
33973398
}
33983399

33993400
return LLDB_INVALID_ADDRESS;

lldb/test/API/functionalities/memory/holes/TestMemoryHoles.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def _prepare_inferior(self):
4343
# inside the holes we've deliberately left empty.
4444
self.memory = self.frame().FindVariable("mem_with_holes").GetValueAsUnsigned()
4545
self.pagesize = self.frame().FindVariable("pagesize").GetValueAsUnsigned()
46+
self.num_pages = self.target().FindFirstGlobalVariable("num_pages").GetValueAsUnsigned()
4647
positions = self.frame().FindVariable("positions")
4748
self.positions = [
4849
positions.GetChildAtIndex(i).GetValueAsUnsigned()
@@ -59,3 +60,14 @@ def test_memory_read(self):
5960
self.assertEqual(len(content), self.pagesize)
6061
self.assertEqual(content[0:7], b"needle\0")
6162
self.assertTrue(error.Fail())
63+
64+
@expectedFailureWindows
65+
def test_memory_find(self):
66+
self._prepare_inferior()
67+
68+
matches = [ f"data found at location: {p:#x}" for p in self.positions ]
69+
self.expect(
70+
f'memory find --count {len(self.positions)+1} --string "needle" '
71+
f'{self.memory:#x} {self.memory+self.pagesize*self.num_pages:#x}',
72+
substrs=matches + ["no more matches within the range"],
73+
)

0 commit comments

Comments
 (0)