Skip to content

Commit 2028474

Browse files
committed
[libc][malloc] Reuse the prev_ field for allocated blocks
This applies a standard trick from Knuth for storing boundary tags with only one word of overhead for allocated blocks. The prev_ block is now only valid if the previous block is free. This is safe, since only coalescing with a free node requires walking the blocks backwards. To allow determining whether it's safe to traverse backwards, the used flag is changed to a prev_free flag. Since it's still possible to unconditionally traverse forward, the prev_free flag for the next block can be used wherever the old used flag is, so long as there is always a next block. To ensure there is always a next block, a sentinel last block is added at the end of the range of blocks. Due to the above, this costs only a single word per heap. This sentinel essentially just stores whether the last real block of the heap is free. The sentinel is always considered used and to have a zero inner size. This completes the block optimizations needed to address llvm#98086. The block structure should now be size-competitive with dlmalloc, although there are still a couple of broader fragmentation concerns to address.
1 parent 8dd5742 commit 2028474

File tree

5 files changed

+222
-298
lines changed

5 files changed

+222
-298
lines changed

libc/src/__support/block.h

Lines changed: 112 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,26 @@ using cpp::optional;
9595
/// +----------+----------+--------------+
9696
/// @endcode
9797
///
98+
/// As a space optimization, when a block is allocated, it consumes the prev
99+
/// field of the following block:
100+
///
101+
/// Block 1 (used):
102+
/// +---------------------+--------------+
103+
/// | Header | Usable space |
104+
/// +----------+----------+--------------+
105+
/// | prev | next | |
106+
/// | 0......3 | 4......7 | 8........230 |
107+
/// | 00000000 | 00000230 | <app data> |
108+
/// +----------+----------+--------------+
109+
/// Block 2:
110+
/// +---------------------+--------------+
111+
/// | B1 | Header | Usable space |
112+
/// +----------+----------+--------------+
113+
/// | | next | |
114+
/// | 0......3 | 4......7 | 8........827 |
115+
/// | xxxxxxxx | 00000830 | f7f7....f7f7 |
116+
/// +----------+----------+--------------+
117+
///
98118
/// The next offset of a block matches the previous offset of its next block.
99119
/// The first block in a list is denoted by having a previous offset of `0`.
100120
///
@@ -110,9 +130,9 @@ using cpp::optional;
110130
template <typename OffsetType = uintptr_t, size_t kAlign = alignof(max_align_t)>
111131
class Block {
112132
// Masks for the contents of the next_ field.
113-
static constexpr size_t USED_MASK = 1 << 0;
133+
static constexpr size_t PREV_FREE_MASK = 1 << 0;
114134
static constexpr size_t LAST_MASK = 1 << 1;
115-
static constexpr size_t SIZE_MASK = ~(USED_MASK | LAST_MASK);
135+
static constexpr size_t SIZE_MASK = ~(PREV_FREE_MASK | LAST_MASK);
116136

117137
public:
118138
using offset_type = OffsetType;
@@ -126,7 +146,8 @@ class Block {
126146
Block(const Block &other) = delete;
127147
Block &operator=(const Block &other) = delete;
128148

129-
/// Creates the first block for a given memory region.
149+
/// Creates the first block for a given memory region, followed by a sentinel
150+
/// last block. Returns the first block.
130151
static optional<Block *> init(ByteSpan region);
131152

132153
/// @returns A pointer to a `Block`, given a pointer to the start of the
@@ -149,7 +170,12 @@ class Block {
149170
size_t outer_size() const { return next_ & SIZE_MASK; }
150171

151172
/// @returns The number of usable bytes inside the block.
152-
size_t inner_size() const { return outer_size() - BLOCK_OVERHEAD; }
173+
size_t inner_size() const {
174+
if (!next())
175+
return 0;
176+
// The usable region includes the prev_ field of the next block.
177+
return outer_size() - BLOCK_OVERHEAD + sizeof(prev_);
178+
}
153179

154180
/// @returns A pointer to the usable space inside this block.
155181
cpp::byte *usable_space() {
@@ -167,8 +193,9 @@ class Block {
167193
/// Attempts to split this block.
168194
///
169195
/// If successful, the block will have an inner size of `new_inner_size`,
170-
/// rounded up to a `ALIGNMENT` boundary. The remaining space will be
171-
/// returned as a new block.
196+
/// rounded to ensure that the split point is on an ALIGNMENT boundary. The
197+
/// remaining space will be returned as a new block. Note that the prev_ field
198+
/// of the next block counts as part of the inner size of the returnd block.
172199
///
173200
/// This method may fail if the remaining space is too small to hold a new
174201
/// block. If this method fails for any reason, the original block is
@@ -182,40 +209,39 @@ class Block {
182209
/// is the last block.
183210
Block *next() const;
184211

185-
/// @returns The block immediately before this one, or a null pointer if this
186-
/// is the first block.
187-
Block *prev() const;
212+
/// @returns The free block immediately before this one, otherwise nullptr.
213+
Block *prev_free() const;
188214

189-
/// Indicates whether the block is in use.
190-
///
191-
/// @returns `true` if the block is in use or `false` if not.
192-
bool used() const { return next_ & USED_MASK; }
215+
/// @returns Whether the block is unavailable for allocation.
216+
bool used() const { return !next() || !next()->prev_free(); }
193217

194218
/// Marks this block as in use.
195-
void mark_used() { next_ |= USED_MASK; }
219+
void mark_used() {
220+
LIBC_ASSERT(next() && "last block is always considered used");
221+
next()->next_ &= ~PREV_FREE_MASK;
222+
}
196223

197224
/// Marks this block as free.
198-
void mark_free() { next_ &= ~USED_MASK; }
225+
void mark_free() {
226+
LIBC_ASSERT(next() && "last block is always considered used");
227+
next()->next_ |= PREV_FREE_MASK;
228+
// The next block's prev_ field becomes alive, as it is no longer part of
229+
// this block's used space.
230+
*new (&next()->prev_) offset_type = outer_size();
231+
}
199232

200233
/// Marks this block as the last one in the chain. Makes next() return
201234
/// nullptr.
202-
constexpr void mark_last() { next_ |= LAST_MASK; }
235+
void mark_last() { next_ |= LAST_MASK; }
203236

204-
/// @brief Checks if a block is valid.
205-
///
206-
/// @returns `true` if and only if the following conditions are met:
207-
/// * The block is aligned.
208-
/// * The prev/next fields match with the previous and next blocks.
209-
bool is_valid() const {
210-
return check_status() == internal::BlockStatus::VALID;
211-
}
212-
213-
constexpr Block(size_t prev_outer_size, size_t outer_size);
237+
constexpr Block(size_t outer_size);
214238

215239
bool is_usable_space_aligned(size_t alignment) const {
216240
return reinterpret_cast<uintptr_t>(usable_space()) % alignment == 0;
217241
}
218242

243+
/// @returns The new inner size of this block that would give the usable
244+
/// space of the next block the given alignment.
219245
size_t padding_for_alignment(size_t alignment) const {
220246
if (is_usable_space_aligned(alignment))
221247
return 0;
@@ -235,9 +261,11 @@ class Block {
235261
// ^
236262
// Alignment requirement
237263
//
238-
uintptr_t start = reinterpret_cast<uintptr_t>(usable_space());
239264
alignment = cpp::max(alignment, ALIGNMENT);
240-
return align_up(start + BLOCK_OVERHEAD, alignment) - start;
265+
uintptr_t start = reinterpret_cast<uintptr_t>(usable_space());
266+
uintptr_t next_usable_space = align_up(start + BLOCK_OVERHEAD, alignment);
267+
uintptr_t next_block = next_usable_space - BLOCK_OVERHEAD;
268+
return next_block - start + sizeof(prev_);
241269
}
242270

243271
// Check that we can `allocate` a block with a given alignment and size from
@@ -272,21 +300,16 @@ class Block {
272300
private:
273301
/// Construct a block to represent a span of bytes. Overwrites only enough
274302
/// memory for the block header; the rest of the span is left alone.
275-
static Block *as_block(size_t prev_outer_size, ByteSpan bytes);
276-
277-
/// Returns a `BlockStatus` that is either VALID or indicates the reason why
278-
/// the block is invalid.
279-
///
280-
/// If the block is invalid at multiple points, this function will only return
281-
/// one of the reasons.
282-
internal::BlockStatus check_status() const;
303+
static Block *as_block(ByteSpan bytes);
283304

284305
/// Like `split`, but assumes the caller has already checked to parameters to
285306
/// ensure the split will succeed.
286307
Block *split_impl(size_t new_inner_size);
287308

288309
/// Offset from this block to the previous block. 0 if this is the first
289-
/// block.
310+
/// block. This field is only alive when the previous block is free;
311+
/// otherwise, its memory is reused as part of the previous block's usable
312+
/// space.
290313
offset_type prev_ = 0;
291314

292315
/// Offset from this block to the next block. Valid even if this is the last
@@ -296,14 +319,12 @@ class Block {
296319
/// Information about the current state of the block is stored in the two low
297320
/// order bits of the next_ value. These are guaranteed free by a minimum
298321
/// alignment (and thus, alignment of the size) of 4. The lowest bit is the
299-
/// `used` flag, and the other bit is the `last` flag.
322+
/// `prev_free` flag, and the other bit is the `last` flag.
300323
///
301-
/// * If the `used` flag is set, the block's usable memory has been allocated
302-
/// and is being used.
303-
/// * If the `last` flag is set, the block does not have a next block.
304-
/// * If the `used` flag is set, the alignment represents the requested value
305-
/// when the memory was allocated, which may be less strict than the actual
306-
/// alignment.
324+
/// * If the `prev_free` flag is set, the block isn't the first and the
325+
/// previous block is free.
326+
/// * If the `last` flag is set, the block is the sentinel last block. It is
327+
/// summarily considered used and has no next block.
307328
} __attribute__((packed, aligned(cpp::max(kAlign, size_t{4}))));
308329

309330
// Public template method implementations.
@@ -332,29 +353,34 @@ Block<OffsetType, kAlign>::init(ByteSpan region) {
332353
return {};
333354

334355
region = result.value();
335-
if (region.size() < BLOCK_OVERHEAD)
356+
if (region.size() < 2*BLOCK_OVERHEAD)
336357
return {};
337358

338359
if (cpp::numeric_limits<OffsetType>::max() < region.size())
339360
return {};
340361

341-
Block *block = as_block(0, region);
342-
block->mark_last();
362+
Block *block = as_block(region.first(region.size() - BLOCK_OVERHEAD));
363+
Block *last = as_block(region.last(BLOCK_OVERHEAD));
364+
block->mark_free();
365+
last->mark_last();
343366
return block;
344367
}
345368

346369
template <typename OffsetType, size_t kAlign>
347370
bool Block<OffsetType, kAlign>::can_allocate(size_t alignment,
348371
size_t size) const {
349-
if (is_usable_space_aligned(alignment) && inner_size() >= size)
350-
return true; // Size and alignment constraints met.
351-
352-
// Either the alignment isn't met or we don't have enough size.
353-
// If we don't meet alignment, we can always adjust such that we do meet the
354-
// alignment. If we meet the alignment but just don't have enough size. This
355-
// check will fail anyway.
356-
size_t adjustment = padding_for_alignment(alignment);
357-
return inner_size() >= size + adjustment;
372+
if (inner_size() < size)
373+
return false;
374+
if (is_usable_space_aligned(alignment))
375+
return true;
376+
377+
// Alignment isn't met, so a padding block is needed. Determine amount of
378+
// inner_size() consumed by the padding block.
379+
size_t padding_size = padding_for_alignment(alignment) - sizeof(prev_);
380+
381+
// Check that there is room for the allocation in the following aligned block.
382+
size_t aligned_inner_size = inner_size() - padding_size - BLOCK_OVERHEAD;
383+
return size <= aligned_inner_size;
358384
}
359385

360386
template <typename OffsetType, size_t kAlign>
@@ -369,26 +395,19 @@ Block<OffsetType, kAlign>::allocate(Block *block, size_t alignment,
369395
BlockInfo info{block, /*prev=*/nullptr, /*next=*/nullptr};
370396

371397
if (!info.block->is_usable_space_aligned(alignment)) {
372-
size_t adjustment = info.block->padding_for_alignment(alignment);
373-
LIBC_ASSERT((adjustment - BLOCK_OVERHEAD) % ALIGNMENT == 0 &&
374-
"The adjustment calculation should always return a new size "
375-
"that's a multiple of ALIGNMENT");
376-
377398
Block *original = info.block;
378399
optional<Block *> maybe_aligned_block =
379-
original->split(adjustment - BLOCK_OVERHEAD);
400+
original->split(info.block->padding_for_alignment(alignment));
380401
LIBC_ASSERT(maybe_aligned_block.has_value() &&
381402
"This split should always result in a new block. The check in "
382403
"`can_allocate` ensures that we have enough space here to make "
383404
"two blocks.");
384405

385-
if (Block *prev = original->prev()) {
386-
// If there is a block before this, we can merge the current one with the
406+
if (Block *prev = original->prev_free()) {
407+
// If there is a free block before this, we can merge the current one with the
387408
// newly created one.
388409
prev->merge_next();
389410
} else {
390-
// Otherwise, this was the very first block in the chain. Now we can make
391-
// it the new first block.
392411
info.prev = original;
393412
}
394413

@@ -410,9 +429,14 @@ optional<Block<OffsetType, kAlign> *>
410429
Block<OffsetType, kAlign>::split(size_t new_inner_size) {
411430
if (used())
412431
return {};
432+
// The prev_ field of the next block is always available, so there is a minimum size to
433+
// a block created through splitting.
434+
if (new_inner_size < sizeof(prev_))
435+
return {};
413436

414437
size_t old_inner_size = inner_size();
415-
new_inner_size = align_up(new_inner_size, ALIGNMENT);
438+
new_inner_size = align_up(new_inner_size - sizeof(prev_), ALIGNMENT) +
439+
sizeof(prev_);
416440
if (old_inner_size < new_inner_size)
417441
return {};
418442

@@ -425,41 +449,26 @@ Block<OffsetType, kAlign>::split(size_t new_inner_size) {
425449
template <typename OffsetType, size_t kAlign>
426450
Block<OffsetType, kAlign> *
427451
Block<OffsetType, kAlign>::split_impl(size_t new_inner_size) {
428-
size_t outer_size1 = new_inner_size + BLOCK_OVERHEAD;
429-
bool has_next = next();
452+
size_t outer_size1 = new_inner_size - sizeof(prev_) + BLOCK_OVERHEAD;
453+
LIBC_ASSERT(outer_size1 % ALIGNMENT == 0 && "new size must be aligned");
430454
ByteSpan new_region = region().subspan(outer_size1);
431-
LIBC_ASSERT(!used() && "used blocks cannot be split");
432-
// The low order bits of outer_size1 should both be zero, and is the correct
433-
// value for the flags is false.
434-
next_ = outer_size1;
435-
LIBC_ASSERT(!used() && next() && "incorrect first split flags");
436-
Block *new_block = as_block(outer_size1, new_region);
437-
438-
if (has_next) {
439-
// The two flags are both false, so next_ is a plain size.
440-
LIBC_ASSERT(!new_block->used() && next() && "flags disrupt use of size");
441-
new_block->next()->prev_ = new_block->next_;
442-
} else {
443-
new_block->mark_last();
444-
}
455+
next_ &= ~SIZE_MASK;
456+
next_ |= outer_size1;
457+
458+
Block *new_block = as_block(new_region);
459+
mark_free(); // Free status for this block is now stored in new_block.
460+
new_block->next()->prev_ = new_region.size();
445461
return new_block;
446462
}
447463

448464
template <typename OffsetType, size_t kAlign>
449465
bool Block<OffsetType, kAlign>::merge_next() {
450-
if (used() || !next() || next()->used())
466+
if (used() || next()->used())
451467
return false;
452-
453-
// Extend the size and copy the last() flag from the next block to this one.
454-
next_ &= SIZE_MASK;
455-
next_ += next()->next_;
456-
457-
if (next()) {
458-
// The two flags are both false, so next_ is a plain size.
459-
LIBC_ASSERT(!used() && next() && "flags disrupt use of size");
460-
next()->prev_ = next_;
461-
}
462-
468+
size_t new_size = outer_size() + next()->outer_size();
469+
next_ &= ~SIZE_MASK;
470+
next_ |= new_size;
471+
next()->prev_ = new_size;
463472
return true;
464473
}
465474

@@ -472,39 +481,24 @@ Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::next() const {
472481
}
473482

474483
template <typename OffsetType, size_t kAlign>
475-
Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::prev() const {
476-
uintptr_t addr = (prev_ == 0) ? 0 : reinterpret_cast<uintptr_t>(this) - prev_;
477-
return reinterpret_cast<Block *>(addr);
484+
Block<OffsetType, kAlign> *Block<OffsetType, kAlign>::prev_free() const {
485+
if (!(next_ & PREV_FREE_MASK))
486+
return nullptr;
487+
return reinterpret_cast<Block *>(reinterpret_cast<uintptr_t>(this) - prev_);
478488
}
479489

480490
// Private template method implementations.
481491

482492
template <typename OffsetType, size_t kAlign>
483-
constexpr Block<OffsetType, kAlign>::Block(size_t prev_outer_size,
484-
size_t outer_size) {
485-
prev_ = prev_outer_size;
493+
constexpr Block<OffsetType, kAlign>::Block(size_t outer_size)
494+
: next_(outer_size) {
486495
LIBC_ASSERT(outer_size % ALIGNMENT == 0 && "block sizes must be aligned");
487-
next_ = outer_size;
488496
}
489497

490498
template <typename OffsetType, size_t kAlign>
491499
Block<OffsetType, kAlign> *
492-
Block<OffsetType, kAlign>::as_block(size_t prev_outer_size, ByteSpan bytes) {
493-
return ::new (bytes.data()) Block(prev_outer_size, bytes.size());
494-
}
495-
496-
template <typename OffsetType, size_t kAlign>
497-
internal::BlockStatus Block<OffsetType, kAlign>::check_status() const {
498-
if (reinterpret_cast<uintptr_t>(this) % ALIGNMENT != 0)
499-
return internal::BlockStatus::MISALIGNED;
500-
501-
if (next() && (this >= next() || this != next()->prev()))
502-
return internal::BlockStatus::NEXT_MISMATCHED;
503-
504-
if (prev() && (this <= prev() || this != prev()->next()))
505-
return internal::BlockStatus::PREV_MISMATCHED;
506-
507-
return internal::BlockStatus::VALID;
500+
Block<OffsetType, kAlign>::as_block(ByteSpan bytes) {
501+
return ::new (bytes.data()) Block(bytes.size());
508502
}
509503

510504
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)