Skip to content

Commit bd94051

Browse files
committed
Improve performance of WeakReference/WeakMap.
Shift pointers by ZEND_MM_ALIGNMENT_LOG2 to avoid the noticeable performance degradation caused by hash table collisions. in `EG(weakrefs)` and zend_weakmap->ht On 64-bit platforms, pointers are usually aligned to at least 8 bytes, so only one in 8 hash buckets were actually getting used. (With the metadata needed to track allocations, alignment might be at least 16 bytes in practice) Address review comments, add optimization Make it public for any extensions that need to work with EG(weakrefs) for instrumentation, debugging, etc. (e.g. zend_test) PHP 8.1 and below would use the raw pointer value as a hash key instead.
1 parent 8b86af4 commit bd94051

File tree

3 files changed

+89
-48
lines changed

3 files changed

+89
-48
lines changed

Zend/zend_weakrefs.c

Lines changed: 64 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,16 @@ typedef struct _zend_weakmap_iterator {
3535
uint32_t ht_iter;
3636
} zend_weakmap_iterator;
3737

38-
/* The EG(weakrefs) ht is a map from object address a tagged pointer, that may be one of
39-
* zend_weakref*, zend_weakmap* or HashTable*. */
38+
/* EG(weakrefs) is a map from a key corresponding to a zend_object pointer to all the WeakReference and/or WeakMap entries relating to that pointer.
39+
*
40+
* 1. For a single WeakReference,
41+
* the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_REF and the pointer is a singleton WeakReference instance (zend_weakref *)for that zend_object pointer (from WeakReference::create()).
42+
* 2. For a single WeakMap, the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_MAP and the pointer is a WeakMap instance (zend_weakmap *).
43+
* 3. For multiple values associated with the same zend_object pointer, the HashTable entry's tag is a ZEND_WEAKREF_TAG_HT with a HashTable mapping
44+
* tagged pointers of at most 1 WeakReference and 1 or more WeakMaps to the same tagged pointer.
45+
*
46+
* ZEND_MM_ALIGNED_OFFSET_LOG2 is at least 2 on supported architectures (pointers to the objects in question are aligned to 4 bytes (1<<2) even on 32-bit systems),
47+
* i.e. the least two significant bits of the pointer can be used as a tag (ZEND_WEAKREF_TAG_*). */
4048
#define ZEND_WEAKREF_TAG_REF 0
4149
#define ZEND_WEAKREF_TAG_MAP 1
4250
#define ZEND_WEAKREF_TAG_HT 2
@@ -56,38 +64,40 @@ static zend_object_handlers zend_weakmap_handlers;
5664
#define zend_weakmap_fetch(z) zend_weakmap_from(Z_OBJ_P(z))
5765

5866
static inline void zend_weakref_unref_single(
59-
void *ptr, uintptr_t tag, zend_ulong obj_addr)
67+
void *ptr, uintptr_t tag, zend_object *object)
6068
{
6169
if (tag == ZEND_WEAKREF_TAG_REF) {
70+
/* Unreferencing WeakReference (at ptr) singleton that pointed to object. */
6271
zend_weakref *wr = ptr;
6372
wr->referent = NULL;
6473
} else {
74+
/* unreferencing WeakMap entry (at ptr) with a key of object. */
6575
ZEND_ASSERT(tag == ZEND_WEAKREF_TAG_MAP);
66-
zend_hash_index_del((HashTable *) ptr, obj_addr);
76+
zend_hash_index_del((HashTable *) ptr, zend_object_ptr_to_weakref_key(object));
6777
}
6878
}
6979

70-
static void zend_weakref_unref(zend_ulong obj_addr, void *tagged_ptr) {
80+
static void zend_weakref_unref(zend_object *object, void *tagged_ptr) {
7181
void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
7282
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
7383
if (tag == ZEND_WEAKREF_TAG_HT) {
7484
HashTable *ht = ptr;
7585
ZEND_HASH_MAP_FOREACH_PTR(ht, tagged_ptr) {
7686
zend_weakref_unref_single(
77-
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), obj_addr);
87+
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), object);
7888
} ZEND_HASH_FOREACH_END();
7989
zend_hash_destroy(ht);
8090
FREE_HASHTABLE(ht);
8191
} else {
82-
zend_weakref_unref_single(ptr, tag, obj_addr);
92+
zend_weakref_unref_single(ptr, tag, object);
8393
}
8494
}
8595

8696
static void zend_weakref_register(zend_object *object, void *payload) {
8797
GC_ADD_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
8898

89-
zend_ulong obj_addr = (zend_ulong) object;
90-
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_addr);
99+
zend_ulong obj_key = zend_object_ptr_to_weakref_key(object);
100+
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_key);
91101
if (Z_TYPE_P(zv) == IS_NULL) {
92102
ZVAL_PTR(zv, payload);
93103
return;
@@ -105,25 +115,28 @@ static void zend_weakref_register(zend_object *object, void *payload) {
105115
zend_hash_init(ht, 0, NULL, NULL, 0);
106116
zend_hash_index_add_new_ptr(ht, (zend_ulong) tagged_ptr, tagged_ptr);
107117
zend_hash_index_add_new_ptr(ht, (zend_ulong) payload, payload);
108-
zend_hash_index_update_ptr(
109-
&EG(weakrefs), obj_addr, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
118+
/* Replace the single WeakMap or WeakReference entry in EG(weakrefs) with a HashTable with 2 entries in place. */
119+
ZVAL_PTR(zv, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
110120
}
111121

112122
static void zend_weakref_unregister(zend_object *object, void *payload, bool weakref_free) {
113-
zend_ulong obj_addr = (zend_ulong) object;
114-
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
123+
zend_ulong obj_key = zend_object_ptr_to_weakref_key(object);
124+
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
115125
ZEND_ASSERT(tagged_ptr && "Weakref not registered?");
116126

117127
void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
118128
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
119129
if (tag != ZEND_WEAKREF_TAG_HT) {
120130
ZEND_ASSERT(tagged_ptr == payload);
121-
zend_hash_index_del(&EG(weakrefs), obj_addr);
131+
zend_hash_index_del(&EG(weakrefs), obj_key);
122132
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
123133

124134
/* Do this last, as it may destroy the object. */
125135
if (weakref_free) {
126-
zend_weakref_unref_single(ptr, tag, obj_addr);
136+
zend_weakref_unref_single(ptr, tag, object);
137+
} else {
138+
/* The optimization of skipping unref is only used in the destructor of WeakMap */
139+
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
127140
}
128141
return;
129142
}
@@ -139,26 +152,29 @@ static void zend_weakref_unregister(zend_object *object, void *payload, bool wea
139152
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
140153
zend_hash_destroy(ht);
141154
FREE_HASHTABLE(ht);
142-
zend_hash_index_del(&EG(weakrefs), obj_addr);
155+
zend_hash_index_del(&EG(weakrefs), obj_key);
143156
}
144157

145158
/* Do this last, as it may destroy the object. */
146159
if (weakref_free) {
147160
zend_weakref_unref_single(
148-
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), obj_addr);
161+
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), object);
162+
} else {
163+
/* The optimization of skipping unref is only used in the destructor of WeakMap */
164+
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
149165
}
150166
}
151167

152168
ZEND_API zval *zend_weakrefs_hash_add(HashTable *ht, zend_object *key, zval *pData) {
153-
zval *zv = zend_hash_index_add(ht, (zend_ulong) key, pData);
169+
zval *zv = zend_hash_index_add(ht, zend_object_ptr_to_weakref_key(key), pData);
154170
if (zv) {
155171
zend_weakref_register(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP));
156172
}
157173
return zv;
158174
}
159175

160176
ZEND_API zend_result zend_weakrefs_hash_del(HashTable *ht, zend_object *key) {
161-
zval *zv = zend_hash_index_find(ht, (zend_ulong) key);
177+
zval *zv = zend_hash_index_find(ht, zend_object_ptr_to_weakref_key(key));
162178
if (zv) {
163179
zend_weakref_unregister(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP), 1);
164180
return SUCCESS;
@@ -170,17 +186,19 @@ void zend_weakrefs_init(void) {
170186
zend_hash_init(&EG(weakrefs), 8, NULL, NULL, 0);
171187
}
172188

189+
/* This is called when the object is garbage collected
190+
* to remove all WeakReference and WeakMap entries weakly referencing that object. */
173191
void zend_weakrefs_notify(zend_object *object) {
174192
/* Annoyingly we can't use the HT destructor here, because we need access to the key (which
175193
* is the object address), which is not provided to the dtor. */
176-
zend_ulong obj_addr = (zend_ulong) object;
177-
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
194+
const zend_ulong obj_key = zend_object_ptr_to_weakref_key(object);
195+
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
178196
#if ZEND_DEBUG
179197
ZEND_ASSERT(tagged_ptr && "Tracking of the IS_OBJ_WEAKLY_REFERENCE flag should be precise");
180198
#endif
181199
if (tagged_ptr) {
182-
zend_weakref_unref(obj_addr, tagged_ptr);
183-
zend_hash_index_del(&EG(weakrefs), obj_addr);
200+
zend_weakref_unref(object, tagged_ptr);
201+
zend_hash_index_del(&EG(weakrefs), obj_key);
184202
}
185203
}
186204

@@ -199,7 +217,7 @@ static zend_object* zend_weakref_new(zend_class_entry *ce) {
199217
}
200218

201219
static zend_always_inline bool zend_weakref_find(zend_object *referent, zval *return_value) {
202-
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), (zend_ulong) referent);
220+
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), zend_object_ptr_to_weakref_key(referent));
203221
if (!tagged_ptr) {
204222
return 0;
205223
}
@@ -295,13 +313,13 @@ static zend_object *zend_weakmap_create_object(zend_class_entry *ce)
295313
static void zend_weakmap_free_obj(zend_object *object)
296314
{
297315
zend_weakmap *wm = zend_weakmap_from(object);
298-
zend_ulong obj_addr;
299-
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_addr) {
316+
zend_ulong obj_key;
317+
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_key) {
300318
/* Optimization: Don't call zend_weakref_unref_single to free individual entries from wm->ht when unregistering (which would do a hash table lookup, call zend_hash_index_del, and skip over any bucket collisions).
301319
* Let freeing the corresponding values for WeakMap entries be done in zend_hash_destroy, freeing objects sequentially.
302320
* The performance difference is notable for larger WeakMaps with worse cache locality. */
303321
zend_weakref_unregister(
304-
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
322+
zend_weakref_key_to_zend_object_ptr(obj_key), ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
305323
} ZEND_HASH_FOREACH_END();
306324
zend_hash_destroy(&wm->ht);
307325
zend_object_std_dtor(&wm->std);
@@ -320,12 +338,12 @@ static zval *zend_weakmap_read_dimension(zend_object *object, zval *offset, int
320338
}
321339

322340
zend_weakmap *wm = zend_weakmap_from(object);
323-
zend_object *obj_key = Z_OBJ_P(offset);
324-
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
341+
zend_object *obj_addr = Z_OBJ_P(offset);
342+
zval *zv = zend_hash_index_find(&wm->ht, zend_object_ptr_to_weakref_key(obj_addr));
325343
if (zv == NULL) {
326344
if (type != BP_VAR_IS) {
327345
zend_throw_error(NULL,
328-
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_key->ce->name), obj_key->handle);
346+
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_addr->ce->name), obj_addr->handle);
329347
return NULL;
330348
}
331349
return NULL;
@@ -350,10 +368,11 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
350368
}
351369

352370
zend_weakmap *wm = zend_weakmap_from(object);
353-
zend_object *obj_key = Z_OBJ_P(offset);
371+
zend_object *obj_addr = Z_OBJ_P(offset);
372+
zend_ulong obj_key = zend_object_ptr_to_weakref_key(obj_addr);
354373
Z_TRY_ADDREF_P(value);
355374

356-
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
375+
zval *zv = zend_hash_index_find(&wm->ht, obj_key);
357376
if (zv) {
358377
/* Because the destructors can have side effects such as resizing or rehashing the WeakMap storage,
359378
* free the zval only after overwriting the original value. */
@@ -364,8 +383,8 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
364383
return;
365384
}
366385

367-
zend_weakref_register(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
368-
zend_hash_index_add_new(&wm->ht, (zend_ulong) obj_key, value);
386+
zend_weakref_register(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
387+
zend_hash_index_add_new(&wm->ht, obj_key, value);
369388
}
370389

371390
/* int return and check_empty due to Object Handler API */
@@ -377,7 +396,7 @@ static int zend_weakmap_has_dimension(zend_object *object, zval *offset, int che
377396
}
378397

379398
zend_weakmap *wm = zend_weakmap_from(object);
380-
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) Z_OBJ_P(offset));
399+
zval *zv = zend_hash_index_find(&wm->ht, zend_object_ptr_to_weakref_key(Z_OBJ_P(offset)));
381400
if (!zv) {
382401
return 0;
383402
}
@@ -396,13 +415,13 @@ static void zend_weakmap_unset_dimension(zend_object *object, zval *offset)
396415
}
397416

398417
zend_weakmap *wm = zend_weakmap_from(object);
399-
zend_object *obj_key = Z_OBJ_P(offset);
400-
if (!zend_hash_index_exists(&wm->ht, (zend_ulong) Z_OBJ_P(offset))) {
418+
zend_object *obj_addr = Z_OBJ_P(offset);
419+
if (!zend_hash_index_exists(&wm->ht, zend_object_ptr_to_weakref_key(obj_addr))) {
401420
/* Object not in WeakMap, do nothing. */
402421
return;
403422
}
404423

405-
zend_weakref_unregister(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
424+
zend_weakref_unregister(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
406425
}
407426

408427
static int zend_weakmap_count_elements(zend_object *object, zend_long *count)
@@ -423,10 +442,10 @@ static HashTable *zend_weakmap_get_properties_for(zend_object *object, zend_prop
423442
ALLOC_HASHTABLE(ht);
424443
zend_hash_init(ht, zend_hash_num_elements(&wm->ht), NULL, ZVAL_PTR_DTOR, 0);
425444

426-
zend_ulong obj_addr;
445+
zend_ulong obj_key;
427446
zval *val;
428-
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_addr, val) {
429-
zend_object *obj = (zend_object*)obj_addr;
447+
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_key, val) {
448+
zend_object *obj = zend_weakref_key_to_zend_object_ptr(obj_key);
430449
zval pair;
431450
array_init(&pair);
432451

@@ -460,11 +479,11 @@ static zend_object *zend_weakmap_clone_obj(zend_object *old_object)
460479
zend_weakmap *new_wm = zend_weakmap_from(new_object);
461480
zend_hash_copy(&new_wm->ht, &old_wm->ht, NULL);
462481

463-
zend_ulong obj_addr;
482+
zend_ulong obj_key;
464483
zval *val;
465-
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_addr, val) {
484+
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_key, val) {
466485
zend_weakref_register(
467-
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
486+
zend_weakref_key_to_zend_object_ptr(obj_key), ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
468487
zval_add_ref(val);
469488
} ZEND_HASH_FOREACH_END();
470489
return new_object;
@@ -511,7 +530,7 @@ static void zend_weakmap_iterator_get_current_key(zend_object_iterator *obj_iter
511530
ZEND_ASSERT(0 && "Must have integer key");
512531
}
513532

514-
ZVAL_OBJ_COPY(key, (zend_object *) num_key);
533+
ZVAL_OBJ_COPY(key, zend_weakref_key_to_zend_object_ptr(num_key));
515534
}
516535

517536
static void zend_weakmap_iterator_move_forward(zend_object_iterator *obj_iter)

Zend/zend_weakrefs.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#ifndef ZEND_WEAKREFS_H
1818
#define ZEND_WEAKREFS_H
1919

20+
#include "zend_alloc.h"
21+
2022
BEGIN_EXTERN_C()
2123

2224
extern ZEND_API zend_class_entry *zend_ce_weakref;
@@ -40,6 +42,26 @@ static zend_always_inline void *zend_weakrefs_hash_add_ptr(HashTable *ht, zend_o
4042
}
4143
}
4244

45+
/* Because php uses the raw numbers as a hash function, raw pointers will lead to hash collisions.
46+
* We have a guarantee that the lowest ZEND_MM_ALIGNED_OFFSET_LOG2 bits of a pointer are zero.
47+
*
48+
* E.g. On most 64-bit platforms, pointers are aligned to 8 bytes, so the least significant 3 bits are always 0 and can be discarded.
49+
*
50+
* NOTE: This function is only used for EG(weakrefs) and zend_weakmap->ht.
51+
* It is not used for the HashTable instances associated with ZEND_WEAKREF_TAG_HT tags (created in zend_weakref_register, which uses ZEND_WEAKREF_ENCODE instead).
52+
* The ZEND_WEAKREF_TAG_HT instances are used to disambiguate between multiple weak references to the same zend_object.
53+
*/
54+
static zend_always_inline zend_ulong zend_object_ptr_to_weakref_key(const zend_object *object)
55+
{
56+
ZEND_ASSERT(((uintptr_t)object) % ZEND_MM_ALIGNMENT == 0);
57+
return ((uintptr_t) object) >> ZEND_MM_ALIGNMENT_LOG2;
58+
}
59+
60+
static zend_always_inline zend_object *zend_weakref_key_to_zend_object_ptr(zend_ulong key)
61+
{
62+
return (zend_object *) (((uintptr_t) key) << ZEND_MM_ALIGNMENT_LOG2);
63+
}
64+
4365
END_EXTERN_C()
4466

4567
#endif

ext/zend_test/test.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,9 @@ PHP_RINIT_FUNCTION(zend_test)
509509

510510
PHP_RSHUTDOWN_FUNCTION(zend_test)
511511
{
512-
zend_ulong objptr;
513-
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), objptr) {
514-
zend_weakrefs_hash_del(&ZT_G(global_weakmap), (zend_object *)(uintptr_t)objptr);
512+
zend_ulong obj_key;
513+
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), obj_key) {
514+
zend_weakrefs_hash_del(&ZT_G(global_weakmap), zend_weakref_key_to_zend_object_ptr(obj_key));
515515
} ZEND_HASH_FOREACH_END();
516516
zend_hash_destroy(&ZT_G(global_weakmap));
517517
return SUCCESS;

0 commit comments

Comments
 (0)