Skip to content

Commit 0e5ff62

Browse files
[libc] add hashtable fuzzing (#87949)
1 parent 38f9c01 commit 0e5ff62

File tree

4 files changed

+221
-7
lines changed

4 files changed

+221
-7
lines changed

libc/fuzzing/__support/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,21 @@ add_libc_fuzzer(
55
DEPENDS
66
libc.src.__support.big_int
77
)
8+
9+
add_libc_fuzzer(
10+
hashtable_fuzz
11+
SRCS
12+
hashtable_fuzz.cpp
13+
DEPENDS
14+
libc.src.__support.HashTable.table
15+
)
16+
17+
add_libc_fuzzer(
18+
hashtable_opt_fuzz
19+
SRCS
20+
hashtable_fuzz.cpp
21+
DEPENDS
22+
libc.src.__support.HashTable.table
23+
COMPILE_OPTIONS
24+
-D__LIBC_EXPLICIT_SIMD_OPT
25+
)
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
//===-- hashtable_fuzz.cpp ------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// Fuzzing test for llvm-libc hashtable implementations.
10+
///
11+
//===----------------------------------------------------------------------===//
12+
#include "include/llvm-libc-types/ENTRY.h"
13+
#include "src/__support/CPP/string_view.h"
14+
#include "src/__support/HashTable/table.h"
15+
16+
namespace LIBC_NAMESPACE {
17+
18+
// A fuzzing payload starts with
19+
// - uint16_t: initial capacity for table A
20+
// - uint64_t: seed for table A
21+
// - uint16_t: initial capacity for table B
22+
// - uint64_t: seed for table B
23+
// Followed by a sequence of actions:
24+
// - CrossCheck: only a single byte valued (4 mod 5)
25+
// - Find: a single byte valued (3 mod 5) followed by a null-terminated string
26+
// - Insert: a single byte valued (0,1,2 mod 5) followed by a null-terminated
27+
// string
28+
static constexpr size_t INITIAL_HEADER_SIZE =
29+
2 * (sizeof(uint16_t) + sizeof(uint64_t));
30+
extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
31+
extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
32+
size_t max_size, unsigned int seed) {
33+
size = LLVMFuzzerMutate(data, size, max_size);
34+
// not enough to read the initial capacities and seeds
35+
if (size < INITIAL_HEADER_SIZE)
36+
return 0;
37+
38+
// skip the initial capacities and seeds
39+
size_t i = INITIAL_HEADER_SIZE;
40+
while (i < size) {
41+
// cross check
42+
if (static_cast<uint8_t>(data[i]) % 5 == 4) {
43+
// skip the cross check byte
44+
++i;
45+
continue;
46+
}
47+
48+
// find or insert
49+
// check if there is enough space for the action byte and the
50+
// null-terminator
51+
if (i + 2 >= max_size)
52+
return i;
53+
// skip the action byte
54+
++i;
55+
// skip the null-terminated string
56+
while (i < max_size && data[i] != 0)
57+
++i;
58+
// in the case the string is not null-terminated, null-terminate it
59+
if (i == max_size && data[i - 1] != 0) {
60+
data[i - 1] = 0;
61+
return max_size;
62+
}
63+
64+
// move to the next action
65+
++i;
66+
}
67+
// return the new size
68+
return i;
69+
}
70+
71+
// a tagged union
72+
struct Action {
73+
enum class Tag { Find, Insert, CrossCheck } tag;
74+
cpp::string_view key;
75+
};
76+
77+
static struct {
78+
size_t remaining;
79+
const char *buffer;
80+
81+
template <typename T> T next() {
82+
static_assert(cpp::is_integral<T>::value, "T must be an integral type");
83+
union {
84+
T result;
85+
char data[sizeof(T)];
86+
};
87+
for (size_t i = 0; i < sizeof(result); i++)
88+
data[i] = buffer[i];
89+
buffer += sizeof(result);
90+
remaining -= sizeof(result);
91+
return result;
92+
}
93+
94+
cpp::string_view next_string() {
95+
cpp::string_view result(buffer);
96+
buffer = result.end() + 1;
97+
remaining -= result.size() + 1;
98+
return result;
99+
}
100+
101+
Action next_action() {
102+
uint8_t byte = next<uint8_t>();
103+
switch (byte % 5) {
104+
case 4:
105+
return {Action::Tag::CrossCheck, {}};
106+
case 3:
107+
return {Action::Tag::Find, next_string()};
108+
default:
109+
return {Action::Tag::Insert, next_string()};
110+
}
111+
}
112+
} global_status;
113+
114+
class HashTable {
115+
internal::HashTable *table;
116+
117+
public:
118+
HashTable(uint64_t size, uint64_t seed)
119+
: table(internal::HashTable::allocate(size, seed)) {}
120+
HashTable(internal::HashTable *table) : table(table) {}
121+
~HashTable() { internal::HashTable::deallocate(table); }
122+
HashTable(HashTable &&other) : table(other.table) { other.table = nullptr; }
123+
bool is_valid() const { return table != nullptr; }
124+
ENTRY *find(const char *key) { return table->find(key); }
125+
ENTRY *insert(const ENTRY &entry) {
126+
return internal::HashTable::insert(this->table, entry);
127+
}
128+
using iterator = internal::HashTable::iterator;
129+
iterator begin() const { return table->begin(); }
130+
iterator end() const { return table->end(); }
131+
};
132+
133+
HashTable next_hashtable() {
134+
size_t size = global_status.next<uint16_t>();
135+
uint64_t seed = global_status.next<uint64_t>();
136+
return HashTable(size, seed);
137+
}
138+
139+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
140+
global_status.buffer = reinterpret_cast<const char *>(data);
141+
global_status.remaining = size;
142+
if (global_status.remaining < INITIAL_HEADER_SIZE)
143+
return 0;
144+
145+
HashTable table_a = next_hashtable();
146+
HashTable table_b = next_hashtable();
147+
for (;;) {
148+
if (global_status.remaining == 0)
149+
break;
150+
Action action = global_status.next_action();
151+
switch (action.tag) {
152+
case Action::Tag::Find: {
153+
if (static_cast<bool>(table_a.find(action.key.data())) !=
154+
static_cast<bool>(table_b.find(action.key.data())))
155+
__builtin_trap();
156+
break;
157+
}
158+
case Action::Tag::Insert: {
159+
char *ptr = const_cast<char *>(action.key.data());
160+
ENTRY *a = table_a.insert(ENTRY{ptr, ptr});
161+
ENTRY *b = table_b.insert(ENTRY{ptr, ptr});
162+
if (a->data != b->data)
163+
__builtin_trap();
164+
break;
165+
}
166+
case Action::Tag::CrossCheck: {
167+
for (ENTRY a : table_a)
168+
if (const ENTRY *b = table_b.find(a.key); a.data != b->data)
169+
__builtin_trap();
170+
171+
for (ENTRY b : table_b)
172+
if (const ENTRY *a = table_a.find(b.key); a->data != b.data)
173+
__builtin_trap();
174+
175+
break;
176+
}
177+
}
178+
}
179+
return 0;
180+
}
181+
182+
} // namespace LIBC_NAMESPACE

libc/fuzzing/__support/uint_fuzz.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
//===-- uint_fuzz.cpp -----------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// Fuzzing test for llvm-libc unsigned integer utilities.
10+
///
11+
//===----------------------------------------------------------------------===//
112
#include "src/__support/CPP/bit.h"
213
#include "src/__support/big_int.h"
314
#include "src/string/memory_utils/inline_memcpy.h"

libc/src/__support/HashTable/generic/bitmask_impl.inc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,11 @@ LIBC_INLINE constexpr bitmask_t repeat_byte(bitmask_t byte) {
3434
return byte;
3535
}
3636

37-
using BitMask = BitMaskAdaptor<bitmask_t, 0x8ull>;
37+
using BitMask = BitMaskAdaptor<bitmask_t, 0x8ul>;
3838
using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
3939

4040
struct Group {
41+
LIBC_INLINE_VAR static constexpr bitmask_t MASK = repeat_byte(0x80ul);
4142
bitmask_t data;
4243

4344
// Load a group of control words from an arbitary address.
@@ -100,21 +101,23 @@ struct Group {
100101
// - The check for key equality will catch these.
101102
// - This only happens if there is at least 1 true match.
102103
// - The chance of this happening is very low (< 1% chance per byte).
103-
auto cmp = data ^ repeat_byte(byte);
104-
auto result = LIBC_NAMESPACE::Endian::to_little_endian(
105-
(cmp - repeat_byte(0x01)) & ~cmp & repeat_byte(0x80));
104+
static constexpr bitmask_t ONES = repeat_byte(0x01ul);
105+
auto cmp = data ^ repeat_byte(static_cast<bitmask_t>(byte) & 0xFFul);
106+
auto result =
107+
LIBC_NAMESPACE::Endian::to_little_endian((cmp - ONES) & ~cmp & MASK);
106108
return {BitMask{result}};
107109
}
108110

109111
// Find out the lanes equal to EMPTY or DELETE (highest bit set) and
110112
// return the bitmask with corresponding bits set.
111113
LIBC_INLINE BitMask mask_available() const {
112-
return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)};
114+
bitmask_t le_data = LIBC_NAMESPACE::Endian::to_little_endian(data);
115+
return {le_data & MASK};
113116
}
114117

115118
LIBC_INLINE IteratableBitMask occupied() const {
116-
return {
117-
{static_cast<bitmask_t>(mask_available().word ^ repeat_byte(0x80))}};
119+
bitmask_t available = mask_available().word;
120+
return {BitMask{available ^ MASK}};
118121
}
119122
};
120123
} // namespace internal

0 commit comments

Comments
 (0)