-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[win/asan] Search both higher and lower in AllocateTrampolineRegion #114212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -187,8 +187,12 @@ static uptr GetMmapGranularity() { | |
return si.dwAllocationGranularity; | ||
} | ||
|
||
UNUSED static uptr RoundDownTo(uptr size, uptr boundary) { | ||
return size & ~(boundary - 1); | ||
} | ||
|
||
UNUSED static uptr RoundUpTo(uptr size, uptr boundary) { | ||
return (size + boundary - 1) & ~(boundary - 1); | ||
return RoundDownTo(size + boundary - 1, boundary); | ||
} | ||
|
||
// FIXME: internal_str* and internal_mem* functions should be moved from the | ||
|
@@ -285,8 +289,11 @@ static void WriteJumpInstruction(uptr from, uptr target) { | |
|
||
static void WriteShortJumpInstruction(uptr from, uptr target) { | ||
sptr offset = target - from - kShortJumpInstructionLength; | ||
if (offset < -128 || offset > 127) | ||
if (offset < -128 || offset > 127) { | ||
ReportError("interception_win: cannot write short jmp from %p to %p\n", | ||
(void *)from, (void *)target); | ||
InterceptionFailed(); | ||
} | ||
*(u8*)from = 0xEB; | ||
*(u8*)(from + 1) = (u8)offset; | ||
} | ||
|
@@ -340,32 +347,78 @@ struct TrampolineMemoryRegion { | |
uptr max_size; | ||
}; | ||
|
||
UNUSED static const uptr kTrampolineScanLimitRange = 1ull << 31; // 2 gig | ||
UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31; // 2 gig | ||
static const int kMaxTrampolineRegion = 1024; | ||
static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion]; | ||
|
||
static void *AllocateTrampolineRegion(uptr image_address, size_t granularity) { | ||
#if SANITIZER_WINDOWS64 | ||
uptr address = image_address; | ||
uptr scanned = 0; | ||
while (scanned < kTrampolineScanLimitRange) { | ||
static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr, | ||
uptr func_addr, size_t granularity) { | ||
# if SANITIZER_WINDOWS64 | ||
// Clamp {min,max}_addr to the accessible address space. | ||
SYSTEM_INFO system_info; | ||
::GetSystemInfo(&system_info); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The call site here calls There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed that it shouldn't matter for performance. I'm not sure global variables would be simpler. Both I'd like to leave this as-is for now, and would be happy to follow up if folks have ideas for simplification. |
||
uptr min_virtual_addr = | ||
RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity); | ||
uptr max_virtual_addr = | ||
RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity); | ||
if (min_addr < min_virtual_addr) | ||
min_addr = min_virtual_addr; | ||
if (max_addr > max_virtual_addr) | ||
max_addr = max_virtual_addr; | ||
|
||
// This loop probes the virtual address space to find free memory in the | ||
// [min_addr, max_addr] interval. The search starts from func_addr and | ||
// proceeds "outwards" towards the interval bounds using two probes, lo_addr | ||
// and hi_addr, for addresses lower/higher than func_addr. At each step, it | ||
// considers the probe closest to func_addr. If that address is not free, the | ||
// probe is advanced (lower or higher depending on the probe) to the next | ||
// memory block and the search continues. | ||
uptr lo_addr = RoundDownTo(func_addr, granularity); | ||
uptr hi_addr = RoundUpTo(func_addr, granularity); | ||
while (lo_addr >= min_addr || hi_addr <= max_addr) { | ||
// Consider the in-range address closest to func_addr. | ||
uptr addr; | ||
if (lo_addr < min_addr) | ||
addr = hi_addr; | ||
else if (hi_addr > max_addr) | ||
addr = lo_addr; | ||
else | ||
addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr; | ||
|
||
MEMORY_BASIC_INFORMATION info; | ||
if (!::VirtualQuery((void*)address, &info, sizeof(info))) | ||
if (!::VirtualQuery((void *)addr, &info, sizeof(info))) { | ||
ReportError( | ||
"interception_win: VirtualQuery in AllocateTrampolineRegion failed " | ||
"for %p\n", | ||
(void *)addr); | ||
return nullptr; | ||
} | ||
|
||
// Check whether a region can be allocated at |address|. | ||
// Check whether a region can be allocated at |addr|. | ||
if (info.State == MEM_FREE && info.RegionSize >= granularity) { | ||
void *page = ::VirtualAlloc((void*)RoundUpTo(address, granularity), | ||
granularity, | ||
MEM_RESERVE | MEM_COMMIT, | ||
PAGE_EXECUTE_READWRITE); | ||
void *page = | ||
::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super nit: There is a race here between VirtualAlloc and VirtualQuery. Nothing prevents other threads (or processes) from tampering with the virtual address space during our search process. If we want this loop to be bulletproof, we would want some kind of retry case here. I think that's probably unnecessary, but this seems like a good place to insert a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I didn't think about that. Added a ReportError call. |
||
PAGE_EXECUTE_READWRITE); | ||
if (page == nullptr) | ||
ReportError( | ||
"interception_win: VirtualAlloc in AllocateTrampolineRegion failed " | ||
"for %p\n", | ||
(void *)addr); | ||
return page; | ||
} | ||
|
||
// Move to the next region. | ||
address = (uptr)info.BaseAddress + info.RegionSize; | ||
scanned += info.RegionSize; | ||
if (addr == lo_addr) | ||
lo_addr = | ||
RoundDownTo((uptr)info.AllocationBase - granularity, granularity); | ||
if (addr == hi_addr) | ||
hi_addr = | ||
RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity); | ||
} | ||
|
||
ReportError( | ||
"interception_win: AllocateTrampolineRegion failed to find free memory; " | ||
"min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n", | ||
(void *)min_addr, (void *)max_addr, granularity); | ||
return nullptr; | ||
#else | ||
return ::VirtualAlloc(nullptr, | ||
|
@@ -387,37 +440,50 @@ void TestOnlyReleaseTrampolineRegions() { | |
} | ||
|
||
static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) { | ||
uptr image_address = func_address; | ||
# if SANITIZER_WINDOWS64 | ||
uptr min_addr = func_address - kTrampolineRangeLimit; | ||
uptr max_addr = func_address + kTrampolineRangeLimit - size; | ||
|
||
#if SANITIZER_WINDOWS64 | ||
// Allocate memory after the module (DLL or EXE file), but within 2GB | ||
// of the start of the module so that any address within the module can be | ||
// referenced with PC-relative operands. | ||
// Allocate memory within 2GB of the module (DLL or EXE file) so that any | ||
// address within the module can be referenced with PC-relative operands. | ||
// This allows us to not just jump to the trampoline with a PC-relative | ||
// offset, but to relocate any instructions that we copy to the trampoline | ||
// which have references to the original module. If we can't find the base | ||
// address of the module (e.g. if func_address is in mmap'ed memory), just | ||
// use func_address as is. | ||
// stay within 2GB of func_address. | ||
HMODULE module; | ||
if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | | ||
GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, | ||
(LPCWSTR)func_address, &module)) { | ||
MODULEINFO module_info; | ||
if (::GetModuleInformation(::GetCurrentProcess(), module, | ||
&module_info, sizeof(module_info))) { | ||
image_address = (uptr)module_info.lpBaseOfDll; | ||
min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage - | ||
kTrampolineRangeLimit; | ||
max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size; | ||
} | ||
} | ||
#endif | ||
|
||
// Find a region within 2G with enough space to allocate |size| bytes. | ||
// Check for overflow. | ||
if (min_addr > func_address) | ||
min_addr = 0; | ||
if (max_addr < func_address) | ||
max_addr = ~(uptr)0; | ||
# else | ||
uptr min_addr = 0; | ||
uptr max_addr = ~min_addr; | ||
# endif | ||
|
||
// Find a region within [min_addr,max_addr] with enough space to allocate | ||
// |size| bytes. | ||
TrampolineMemoryRegion *region = nullptr; | ||
for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) { | ||
TrampolineMemoryRegion* current = &TrampolineRegions[bucket]; | ||
if (current->content == 0) { | ||
// No valid region found, allocate a new region. | ||
size_t bucket_size = GetMmapGranularity(); | ||
void *content = AllocateTrampolineRegion(image_address, bucket_size); | ||
void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address, | ||
bucket_size); | ||
if (content == nullptr) | ||
return 0U; | ||
|
||
|
@@ -427,13 +493,9 @@ static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) { | |
region = current; | ||
break; | ||
} else if (current->max_size - current->allocated_size > size) { | ||
#if SANITIZER_WINDOWS64 | ||
// In 64-bits, the memory space must be allocated within 2G boundary. | ||
uptr next_address = current->content + current->allocated_size; | ||
if (next_address < image_address || | ||
next_address - image_address >= 0x7FFF0000) | ||
continue; | ||
#endif | ||
uptr next_address = current->content + current->allocated_size; | ||
if (next_address < min_addr || next_address > max_addr) | ||
continue; | ||
// The space can be allocated in the current region. | ||
region = current; | ||
break; | ||
|
@@ -870,8 +932,14 @@ static bool CopyInstructions(uptr to, uptr from, size_t size) { | |
// this will be untrue if relocated_offset \notin [-2**31, 2**31) | ||
s64 delta = to - from; | ||
s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta; | ||
if (-0x8000'0000ll > relocated_offset || relocated_offset > 0x7FFF'FFFFll) | ||
if (-0x8000'0000ll > relocated_offset || | ||
relocated_offset > 0x7FFF'FFFFll) { | ||
ReportError( | ||
"interception_win: CopyInstructions relocated_offset %lld outside " | ||
"32-bit range\n", | ||
(long long)relocated_offset); | ||
return false; | ||
} | ||
# else | ||
// on 32-bit, the relative offset will always be correct | ||
s32 delta = to - from; | ||
|
@@ -1165,19 +1233,27 @@ uptr InternalGetProcAddress(void *module, const char *func_name) { | |
// exported directory. | ||
char function_name[256]; | ||
size_t funtion_name_length = _strlen(func); | ||
if (funtion_name_length >= sizeof(function_name) - 1) | ||
if (funtion_name_length >= sizeof(function_name) - 1) { | ||
ReportError("interception_win: func too long: '%s'\n", func); | ||
InterceptionFailed(); | ||
} | ||
|
||
_memcpy(function_name, func, funtion_name_length); | ||
function_name[funtion_name_length] = '\0'; | ||
char* separator = _strchr(function_name, '.'); | ||
if (!separator) | ||
if (!separator) { | ||
ReportError("interception_win: no separator in '%s'\n", | ||
function_name); | ||
InterceptionFailed(); | ||
} | ||
*separator = '\0'; | ||
|
||
void* redirected_module = GetModuleHandleA(function_name); | ||
if (!redirected_module) | ||
if (!redirected_module) { | ||
ReportError("interception_win: GetModuleHandleA failed for '%s'\n", | ||
function_name); | ||
InterceptionFailed(); | ||
} | ||
return InternalGetProcAddress(redirected_module, separator + 1); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this deserves a significant doc comment. The loop below probes the address space in a search that alternates between forward probing and backwards probing, and that makes the logic fairly hard to follow. I'd stop short of ASCII art, but a text list like this would help:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment that should make it more clear.
It doesn't go as far as providing a concrete example like you suggested, because the order would depend on the addresses, and at that point I think the example would be more complex than it's worth.