-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[libc][bug] Fix out of bound write in memcpy w/ software prefetching #90591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Guillaume Chatelet (gchatelet) ChangesFull diff: https://github.com/llvm/llvm-project/pull/90591.diff 4 Files Affected:
diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index 9dd4fceee3b596..6147ebfcb5013f 100644
--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -25,6 +25,22 @@ add_libc_fuzzer(
libc.src.string.strlen
)
+add_libc_fuzzer(
+ memcpy_fuzz
+ SRCS
+ memcpy_fuzz.cpp
+ DEPENDS
+ libc.src.string.memcpy
+)
+
+add_libc_fuzzer(
+ memset_fuzz
+ SRCS
+ memset_fuzz.cpp
+ DEPENDS
+ libc.src.string.memset
+)
+
add_libc_fuzzer(
memcmp_fuzz
SRCS
diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
new file mode 100644
index 00000000000000..f9d98613cd7e23
--- /dev/null
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -0,0 +1,55 @@
+//===-- memcpy_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcpy implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memcpy.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+#include <stdlib.h> // rand
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+ static constexpr size_t MAX_SIZE = 1024;
+ static ProtectedPages regions;
+ static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ static const Page read_buffer = [&]() {
+ // We fetch page B in write mode.
+ auto region = regions.GetPageB().WithAccess(PROT_WRITE);
+ // And fill it with random numbers.
+ for (size_t i = 0; i < region.page_size; ++i)
+ region.page_ptr[i] = rand();
+ // Then return it in read mode.
+ return region.WithAccess(PROT_READ);
+ }();
+ // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+ // 2 bytes.
+ uint16_t size = 0;
+ if (data_size != sizeof(size))
+ return 0;
+ __builtin_memcpy(&size, data, sizeof(size));
+ if (size >= MAX_SIZE || size >= GetPageSize())
+ return 0;
+ // We cross-check the function from two sources and two destinations.
+ // The first of them (bottom) is always page aligned.
+ // The second one (top) is not necessarily aligned.
+ // Both sources and destinations are checked for out of bound accesses.
+ const uint8_t *sources[2] = {read_buffer.bottom(size), read_buffer.top(size)};
+ uint8_t *destinations[2] = {write_buffer.bottom(size),
+ write_buffer.top(size)};
+ for (const uint8_t *src : sources) {
+ for (uint8_t *dst : destinations) {
+ LIBC_NAMESPACE::memcpy(dst, src, size);
+ for (size_t i = 0; i < size; ++i)
+ if (src[i] != dst[i])
+ __builtin_trap();
+ }
+ }
+ return 0;
+}
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
new file mode 100644
index 00000000000000..445bb7b6fa2d6b
--- /dev/null
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -0,0 +1,44 @@
+//===-- memset_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcset implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memset.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+ static constexpr size_t MAX_SIZE = 1024;
+ static ProtectedPages regions;
+ static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+ // 2 bytes.
+ uint16_t size = 0;
+ uint8_t fill_char = 0;
+ if (data_size != sizeof(size) + sizeof(fill_char))
+ return 0;
+ __builtin_memcpy(&size, data, sizeof(size));
+ __builtin_memcpy(&fill_char, data + sizeof(size), sizeof(fill_char));
+ if (size >= MAX_SIZE || size >= GetPageSize())
+ return 0;
+ // We cross-check the function from two sources and two destinations.
+ // The first of them (bottom) is always page aligned.
+ // The second one (top) is not necessarily aligned.
+ // Both sources and destinations are checked for out of bound accesses.
+ uint8_t *destinations[2] = {write_buffer.bottom(size),
+ write_buffer.top(size)};
+ for (uint8_t *dst : destinations) {
+ LIBC_NAMESPACE::memset(dst, fill_char, size);
+ for (size_t i = 0; i < size; ++i)
+ if (dst[i] != fill_char)
+ __builtin_trap();
+ }
+ return 0;
+}
diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
new file mode 100644
index 00000000000000..fa09f9ac7947f1
--- /dev/null
+++ b/libc/fuzzing/string/protected_pages.h
@@ -0,0 +1,74 @@
+#ifndef LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+#define LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+#include <sys/mman.h> // mmap, munmap
+#include <unistd.h> // sysconf, _SC_PAGESIZE
+
+// Returns mmap page size.
+size_t GetPageSize() { return sysconf(_SC_PAGESIZE); }
+
+// Represents a page of memory which access can be configured throught the
+// 'WithAccess' function. Accessing data above or below this page will trap as
+// it is sandwiched between two pages with no read / write access.
+struct Page {
+ // Returns an aligned pointer that can be accessed up to page_size. Accessing
+ // data at ptr[-1] will fault.
+ uint8_t *bottom(size_t size) const {
+ if (size >= page_size)
+ __builtin_trap();
+ return page_ptr;
+ }
+ // Returns a pointer to a buffer that can be accessed up to size. Accessing
+ // data at ptr[size] will fault.
+ uint8_t *top(size_t size) const { return page_ptr + page_size - size; }
+
+ Page &WithAccess(int protection) {
+ if (mprotect(page_ptr, page_size, protection) != 0)
+ __builtin_trap();
+ return *this;
+ }
+
+ const size_t page_size;
+ uint8_t *const page_ptr;
+};
+
+// Allocates 5 consecutive pages that will trap if accessed.
+// +-----------------+
+// | page 0 (FAULT) |
+// | page 1 (CUSTOM) |
+// | page 2 (FAULT) |
+// | page 3 (CUSTOM) |
+// | page 4 (FAULT) |
+// +-----------------+
+// The pages 1 and 3 can be retrieved as with 'GetPageA' / 'GetPageB' and their
+// accesses can be customized through the 'WithAccess' function.
+struct ProtectedPages {
+ static constexpr size_t PAGES = 5;
+
+ ProtectedPages()
+ : page_size(GetPageSize()),
+ ptr(mmap(/*address*/ nullptr, /*length*/ PAGES * page_size,
+ /*protection*/ PROT_NONE,
+ /*flags*/ MAP_PRIVATE | MAP_ANONYMOUS, /*fd*/ -1,
+ /*offset*/ 0)) {
+ if (reinterpret_cast<intptr_t>(ptr) == -1)
+ __builtin_trap();
+ }
+ ~ProtectedPages() { munmap(ptr, PAGES * page_size); }
+
+ Page GetPageA() const { return Page{page_size, page<1>()}; }
+ Page GetPageB() const { return Page{page_size, page<3>()}; }
+
+private:
+ template <size_t index> uint8_t *page() const {
+ static_assert(index < PAGES);
+ return static_cast<uint8_t *>(ptr) + (index * page_size);
+ }
+
+ const size_t page_size;
+ void *const ptr = nullptr;
+};
+
+#endif // LIBC_FUZZING_STRING_PROTECTED_PAGES_H
|
libc/fuzzing/string/memset_fuzz.cpp
Outdated
|
||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) { | ||
static constexpr size_t MAX_SIZE = 1024; | ||
static ProtectedPages pages; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think using msan should catch these without having to write a custom class and make the fuzz test clearer. It should be as simple as __msan_poison
the dst (to check we have no spurious reads) and _msan_unpoison
the src (to allow reads).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So for a reason that I don't understand it seems that passing -fsanitize=memory
globally does not instrument the memcpy
code. As a consequence, I couldn't get the sanitizer to find the bug.
The compiled memcpy
library contains a symbol for sanitizer initialization but the memcpy
code is left un-instrumented.
Having all libc functions instrumented is one of the goals of the project so we definitely need to find the root cause.
In the meantime, I'm willing to submit this code as-is if you don't mind as it's a reproducer for a bug in memcpy
with software prefetching when AVX
is enabled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it have something to do with the function name?
This bug showed up when running fuzzers newly added fuzzers llvm#90591.
So as discussed offline, I turned the fuzzers into tests and fixed the bug in this patch to avoid breaking the build bots. |
The last presubmit run is indeed running the tests
|
memcpy
and memset
This patch adds tests for
memcpy
andmemset
making sure that we don't access buffers out of bounds. It relies on POSIXmmap
/mprotect
and works only when FULL_BUILD_MODE is disabled.The bug showed up while enabling software prefetching.
loop_and_tail_offset
is always running at least one iteration but in some configurations loop unrolled prefetching was actually needing only the tail operation and no loop iterations at all.