Skip to content

Commit 4cfe551

Browse files
thestingerJason Evans
authored and
Jason Evans
committed
Add support for sized deallocation.
This adds a new `sdallocx` function to the external API, allowing the size to be passed by the caller. It avoids some extra reads in the thread cache fast path. In the case where stats are enabled, this avoids the work of calculating the size from the pointer. An assertion validates the size that's passed in, so enabling debugging will allow users of the API to debug cases where an incorrect size is passed in. The performance win for a contrived microbenchmark doing an allocation and immediately freeing it is ~10%. It may have a different impact on a real workload. Closes #28
1 parent c3f8650 commit 4cfe551

File tree

10 files changed

+201
-5
lines changed

10 files changed

+201
-5
lines changed

Makefile.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \
136136
$(srcroot)test/unit/prof_accum_b.c
137137
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
138138
$(srcroot)test/integration/allocated.c \
139+
$(srcroot)test/integration/sdallocx.c \
139140
$(srcroot)test/integration/mallocx.c \
140141
$(srcroot)test/integration/MALLOCX_ARENA.c \
141142
$(srcroot)test/integration/posix_memalign.c \

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ AC_PROG_RANLIB
452452
AC_PATH_PROG([LD], [ld], [false], [$PATH])
453453
AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
454454

455-
public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
455+
public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
456456

457457
dnl Check for allocator-related functions that should be wrapped.
458458
AC_CHECK_FUNC([memalign],

doc/jemalloc.xml.in

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
<refname>xallocx</refname>
3939
<refname>sallocx</refname>
4040
<refname>dallocx</refname>
41+
<refname>sdallocx</refname>
4142
<refname>nallocx</refname>
4243
<refname>mallctl</refname>
4344
<refname>mallctlnametomib</refname>
@@ -120,6 +121,12 @@
120121
<paramdef>void *<parameter>ptr</parameter></paramdef>
121122
<paramdef>int <parameter>flags</parameter></paramdef>
122123
</funcprototype>
124+
<funcprototype>
125+
<funcdef>void <function>sdallocx</function></funcdef>
126+
<paramdef>void *<parameter>ptr</parameter></paramdef>
127+
<paramdef>size_t <parameter>size</parameter></paramdef>
128+
<paramdef>int <parameter>flags</parameter></paramdef>
129+
</funcprototype>
123130
<funcprototype>
124131
<funcdef>size_t <function>nallocx</function></funcdef>
125132
<paramdef>size_t <parameter>size</parameter></paramdef>
@@ -228,7 +235,8 @@
228235
<function>rallocx<parameter/></function>,
229236
<function>xallocx<parameter/></function>,
230237
<function>sallocx<parameter/></function>,
231-
<function>dallocx<parameter/></function>, and
238+
<function>dallocx<parameter/></function>,
239+
<function>sdallocx<parameter/></function>, and
232240
<function>nallocx<parameter/></function> functions all have a
233241
<parameter>flags</parameter> argument that can be used to specify
234242
options. The functions only check the options that are contextually
@@ -312,6 +320,15 @@
312320
memory referenced by <parameter>ptr</parameter> to be made available for
313321
future allocations.</para>
314322

323+
<para>The <function>sdallocx<parameter/></function> function is an
324+
extension of <function>dallocx<parameter/></function> with a
325+
<parameter>size</parameter> parameter to allow the caller to pass in the
326+
allocation size as an optimization. The minimum valid input size is the
327+
original requested size of the allocation, and the maximum valid input
328+
size is the corresponding value returned by
329+
<function>nallocx<parameter/></function> or
330+
<function>sallocx<parameter/></function>.</para>
331+
315332
<para>The <function>nallocx<parameter/></function> function allocates no
316333
memory, but it performs the same size computation as the
317334
<function>mallocx<parameter/></function> function, and returns the real

include/jemalloc/internal/arena.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
488488
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
489489
size_t arena_salloc(const void *ptr, bool demote);
490490
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
491+
void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache);
491492
#endif
492493

493494
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -1139,9 +1140,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
11391140
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
11401141
/* Small allocation. */
11411142
if (try_tcache && (tcache = tcache_get(false)) != NULL) {
1142-
size_t binind;
1143-
1144-
binind = arena_ptr_small_binind_get(ptr, mapbits);
1143+
size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
11451144
tcache_dalloc_small(tcache, ptr, binind);
11461145
} else
11471146
arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
@@ -1157,6 +1156,34 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
11571156
arena_dalloc_large(chunk->arena, chunk, ptr);
11581157
}
11591158
}
1159+
1160+
JEMALLOC_ALWAYS_INLINE void
1161+
arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache)
1162+
{
1163+
tcache_t *tcache;
1164+
1165+
assert(ptr != NULL);
1166+
assert(CHUNK_ADDR2BASE(ptr) != ptr);
1167+
1168+
if (size < PAGE) {
1169+
/* Small allocation. */
1170+
if (try_tcache && (tcache = tcache_get(false)) != NULL) {
1171+
size_t binind = small_size2bin(size);
1172+
tcache_dalloc_small(tcache, ptr, binind);
1173+
} else {
1174+
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1175+
arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
1176+
}
1177+
} else {
1178+
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
1179+
1180+
if (try_tcache && size <= tcache_maxclass && (tcache =
1181+
tcache_get(false)) != NULL) {
1182+
tcache_dalloc_large(tcache, ptr, size);
1183+
} else
1184+
arena_dalloc_large(chunk->arena, chunk, ptr);
1185+
}
1186+
}
11601187
# endif /* JEMALLOC_ARENA_INLINE_C */
11611188
#endif
11621189

include/jemalloc/internal/jemalloc_internal.h.in

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,8 +634,10 @@ size_t ivsalloc(const void *ptr, bool demote);
634634
size_t u2rz(size_t usize);
635635
size_t p2rz(const void *ptr);
636636
void idalloct(void *ptr, bool try_tcache);
637+
void isdalloct(void *ptr, size_t size, bool try_tcache);
637638
void idalloc(void *ptr);
638639
void iqalloc(void *ptr, bool try_tcache);
640+
void isqalloc(void *ptr, size_t size, bool try_tcache);
639641
void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
640642
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
641643
arena_t *arena);
@@ -787,6 +789,20 @@ idalloct(void *ptr, bool try_tcache)
787789
huge_dalloc(ptr);
788790
}
789791

792+
JEMALLOC_ALWAYS_INLINE void
793+
isdalloct(void *ptr, size_t size, bool try_tcache)
794+
{
795+
arena_chunk_t *chunk;
796+
797+
assert(ptr != NULL);
798+
799+
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
800+
if (chunk != ptr)
801+
arena_sdalloc(chunk, ptr, size, try_tcache);
802+
else
803+
huge_dalloc(ptr);
804+
}
805+
790806
JEMALLOC_ALWAYS_INLINE void
791807
idalloc(void *ptr)
792808
{
@@ -804,6 +820,16 @@ iqalloc(void *ptr, bool try_tcache)
804820
idalloct(ptr, try_tcache);
805821
}
806822

823+
JEMALLOC_ALWAYS_INLINE void
824+
isqalloc(void *ptr, size_t size, bool try_tcache)
825+
{
826+
827+
if (config_fill && opt_quarantine)
828+
quarantine(ptr);
829+
else
830+
idalloct(ptr, try_tcache);
831+
}
832+
807833
JEMALLOC_ALWAYS_INLINE void *
808834
iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
809835
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,

include/jemalloc/internal/private_symbols.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ arena_ralloc_no_move
6161
arena_redzone_corruption
6262
arena_run_regind
6363
arena_salloc
64+
arena_sdalloc
6465
arena_stats_merge
6566
arena_tcache_fill_small
6667
arenas
@@ -228,7 +229,9 @@ iralloc
228229
iralloct
229230
iralloct_realign
230231
isalloc
232+
isdalloct
231233
isthreaded
234+
isqalloc
232235
ivsalloc
233236
ixalloc
234237
jemalloc_postfork_child

include/jemalloc/jemalloc_protos.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra,
2525
JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags)
2626
JEMALLOC_ATTR(pure);
2727
JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags);
28+
JEMALLOC_EXPORT void @je_@sdallocx(void *ptr, size_t size, int flags);
2829
JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags)
2930
JEMALLOC_ATTR(pure);
3031

src/jemalloc.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,24 @@ ifree(void *ptr, bool try_tcache)
12231223
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
12241224
}
12251225

1226+
JEMALLOC_INLINE_C void
1227+
isfree(void *ptr, size_t usize, bool try_tcache)
1228+
{
1229+
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
1230+
1231+
assert(ptr != NULL);
1232+
assert(malloc_initialized || IS_INITIALIZER);
1233+
1234+
if (config_prof && opt_prof)
1235+
prof_free(ptr, usize);
1236+
if (config_stats)
1237+
thread_allocated_tsd_get()->deallocated += usize;
1238+
if (config_valgrind && in_valgrind)
1239+
rzsize = p2rz(ptr);
1240+
isqalloc(ptr, usize, try_tcache);
1241+
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
1242+
}
1243+
12261244
void *
12271245
je_realloc(void *ptr, size_t size)
12281246
{
@@ -1820,6 +1838,32 @@ je_dallocx(void *ptr, int flags)
18201838
ifree(ptr, try_tcache);
18211839
}
18221840

1841+
void
1842+
je_sdallocx(void *ptr, size_t size, int flags)
1843+
{
1844+
bool try_tcache;
1845+
1846+
assert(ptr != NULL);
1847+
assert(malloc_initialized || IS_INITIALIZER);
1848+
assert(size == isalloc(ptr, config_prof));
1849+
1850+
if ((flags & MALLOCX_LG_ALIGN_MASK) == 0)
1851+
size = s2u(size);
1852+
else
1853+
size = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
1854+
1855+
if ((flags & MALLOCX_ARENA_MASK) != 0) {
1856+
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
1857+
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
1858+
try_tcache = (chunk == ptr || chunk->arena !=
1859+
arenas[arena_ind]);
1860+
} else
1861+
try_tcache = true;
1862+
1863+
UTRACE(ptr, 0, 0);
1864+
isfree(ptr, size, try_tcache);
1865+
}
1866+
18231867
size_t
18241868
je_nallocx(size_t size, int flags)
18251869
{

test/integration/sdallocx.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include "test/jemalloc_test.h"
2+
3+
#define MAXALIGN (((size_t)1) << 25)
4+
#define NITER 4
5+
6+
TEST_BEGIN(test_basic)
7+
{
8+
void *ptr = mallocx(64, 0);
9+
sdallocx(ptr, 64, 0);
10+
}
11+
TEST_END
12+
13+
TEST_BEGIN(test_alignment_and_size)
14+
{
15+
size_t nsz, sz, alignment, total;
16+
unsigned i;
17+
void *ps[NITER];
18+
19+
for (i = 0; i < NITER; i++)
20+
ps[i] = NULL;
21+
22+
for (alignment = 8;
23+
alignment <= MAXALIGN;
24+
alignment <<= 1) {
25+
total = 0;
26+
for (sz = 1;
27+
sz < 3 * alignment && sz < (1U << 31);
28+
sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
29+
for (i = 0; i < NITER; i++) {
30+
nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
31+
MALLOCX_ZERO);
32+
ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
33+
MALLOCX_ZERO);
34+
total += nsz;
35+
if (total >= (MAXALIGN << 1))
36+
break;
37+
}
38+
for (i = 0; i < NITER; i++) {
39+
if (ps[i] != NULL) {
40+
sdallocx(ps[i], sz,
41+
MALLOCX_ALIGN(alignment));
42+
ps[i] = NULL;
43+
}
44+
}
45+
}
46+
}
47+
}
48+
TEST_END
49+
50+
int
51+
main(void)
52+
{
53+
54+
return (test(
55+
test_basic,
56+
test_alignment_and_size));
57+
}

test/stress/microbench.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,17 @@ malloc_dallocx(void)
7272
dallocx(p, 0);
7373
}
7474

75+
static void
76+
malloc_sdallocx(void)
77+
{
78+
void *p = malloc(1);
79+
if (p == NULL) {
80+
test_fail("Unexpected malloc() failure");
81+
return;
82+
}
83+
sdallocx(p, 1, 0);
84+
}
85+
7586
TEST_BEGIN(test_free_vs_dallocx)
7687
{
7788

@@ -80,6 +91,14 @@ TEST_BEGIN(test_free_vs_dallocx)
8091
}
8192
TEST_END
8293

94+
TEST_BEGIN(test_dallocx_vs_sdallocx)
95+
{
96+
97+
compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
98+
"sdallocx", malloc_sdallocx);
99+
}
100+
TEST_END
101+
83102
static void
84103
malloc_mus_free(void)
85104
{
@@ -135,6 +154,7 @@ main(void)
135154
return (test(
136155
test_malloc_vs_mallocx,
137156
test_free_vs_dallocx,
157+
test_dallocx_vs_sdallocx,
138158
test_mus_vs_sallocx,
139159
test_sallocx_vs_nallocx));
140160
}

0 commit comments

Comments
 (0)