Skip to content

Commit bc34dee

Browse files
joannekoonganakryiko
authored andcommitted
bpf: Dynptr support for ring buffers
Currently, our only way of writing dynamically-sized data into a ring buffer is through bpf_ringbuf_output but this incurs an extra memcpy cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra memcpy, but it can only safely support reservation sizes that are statically known since the verifier cannot guarantee that the bpf program won’t access memory outside the reserved space. The bpf_dynptr abstraction allows for dynamically-sized ring buffer reservations without the extra memcpy. There are 3 new APIs: long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr); void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags); void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags); These closely follow the functionalities of the original ringbuf APIs. For example, all ringbuffer dynptrs that have been reserved must be either submitted or discarded before the program exits. Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Acked-by: David Vernet <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 263ae15 commit bc34dee

File tree

7 files changed

+223
-8
lines changed

7 files changed

+223
-8
lines changed

include/linux/bpf.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,11 +395,14 @@ enum bpf_type_flag {
395395
/* DYNPTR points to memory local to the bpf program. */
396396
DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS),
397397

398+
/* DYNPTR points to a ringbuf record. */
399+
DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS),
400+
398401
__BPF_TYPE_FLAG_MAX,
399402
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
400403
};
401404

402-
#define DYNPTR_TYPE_FLAG_MASK DYNPTR_TYPE_LOCAL
405+
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
403406

404407
/* Max number of base types. */
405408
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -2231,6 +2234,9 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
22312234
extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
22322235
extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
22332236
extern const struct bpf_func_proto bpf_ringbuf_query_proto;
2237+
extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto;
2238+
extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto;
2239+
extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto;
22342240
extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
22352241
extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
22362242
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
@@ -2402,6 +2408,13 @@ enum bpf_dynptr_type {
24022408
BPF_DYNPTR_TYPE_INVALID,
24032409
/* Points to memory that is local to the bpf program */
24042410
BPF_DYNPTR_TYPE_LOCAL,
2411+
/* Underlying data is a ringbuf record */
2412+
BPF_DYNPTR_TYPE_RINGBUF,
24052413
};
24062414

2415+
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
2416+
enum bpf_dynptr_type type, u32 offset, u32 size);
2417+
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
2418+
int bpf_dynptr_check_size(u32 size);
2419+
24072420
#endif /* _LINUX_BPF_H */

include/linux/bpf_verifier.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ struct bpf_reg_state {
100100
* for the purpose of tracking that it's freed.
101101
* For PTR_TO_SOCKET this is used to share which pointers retain the
102102
* same reference to the socket, to determine proper reference freeing.
103+
* For stack slots that are dynptrs, this is used to track references to
104+
* the dynptr to determine proper reference freeing.
103105
*/
104106
u32 id;
105107
/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned

include/uapi/linux/bpf.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5189,6 +5189,38 @@ union bpf_attr {
51895189
* Return
51905190
* 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
51915191
* -EINVAL if flags is not 0.
5192+
*
5193+
* long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
5194+
* Description
5195+
* Reserve *size* bytes of payload in a ring buffer *ringbuf*
5196+
* through the dynptr interface. *flags* must be 0.
5197+
*
5198+
* Please note that a corresponding bpf_ringbuf_submit_dynptr or
5199+
* bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the
5200+
* reservation fails. This is enforced by the verifier.
5201+
* Return
5202+
* 0 on success, or a negative error in case of failure.
5203+
*
5204+
* void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
5205+
* Description
5206+
* Submit reserved ring buffer sample, pointed to by *data*,
5207+
* through the dynptr interface. This is a no-op if the dynptr is
5208+
* invalid/null.
5209+
*
5210+
* For more information on *flags*, please see
5211+
* 'bpf_ringbuf_submit'.
5212+
* Return
5213+
* Nothing. Always succeeds.
5214+
*
5215+
* void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
5216+
* Description
5217+
* Discard reserved ring buffer sample through the dynptr
5218+
* interface. This is a no-op if the dynptr is invalid/null.
5219+
*
5220+
* For more information on *flags*, please see
5221+
* 'bpf_ringbuf_discard'.
5222+
* Return
5223+
* Nothing. Always succeeds.
51925224
*/
51935225
#define __BPF_FUNC_MAPPER(FN) \
51945226
FN(unspec), \
@@ -5389,6 +5421,9 @@ union bpf_attr {
53895421
FN(map_lookup_percpu_elem), \
53905422
FN(skc_to_mptcp_sock), \
53915423
FN(dynptr_from_mem), \
5424+
FN(ringbuf_reserve_dynptr), \
5425+
FN(ringbuf_submit_dynptr), \
5426+
FN(ringbuf_discard_dynptr), \
53925427
/* */
53935428

53945429
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

kernel/bpf/helpers.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,21 +1423,21 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
14231423
ptr->size |= type << DYNPTR_TYPE_SHIFT;
14241424
}
14251425

1426-
static int bpf_dynptr_check_size(u32 size)
1426+
int bpf_dynptr_check_size(u32 size)
14271427
{
14281428
return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
14291429
}
14301430

1431-
static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
1432-
enum bpf_dynptr_type type, u32 offset, u32 size)
1431+
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
1432+
enum bpf_dynptr_type type, u32 offset, u32 size)
14331433
{
14341434
ptr->data = data;
14351435
ptr->offset = offset;
14361436
ptr->size = size;
14371437
bpf_dynptr_set_type(ptr, type);
14381438
}
14391439

1440-
static void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
1440+
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
14411441
{
14421442
memset(ptr, 0, sizeof(*ptr));
14431443
}
@@ -1523,6 +1523,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
15231523
return &bpf_ringbuf_discard_proto;
15241524
case BPF_FUNC_ringbuf_query:
15251525
return &bpf_ringbuf_query_proto;
1526+
case BPF_FUNC_ringbuf_reserve_dynptr:
1527+
return &bpf_ringbuf_reserve_dynptr_proto;
1528+
case BPF_FUNC_ringbuf_submit_dynptr:
1529+
return &bpf_ringbuf_submit_dynptr_proto;
1530+
case BPF_FUNC_ringbuf_discard_dynptr:
1531+
return &bpf_ringbuf_discard_dynptr_proto;
15261532
case BPF_FUNC_for_each_map_elem:
15271533
return &bpf_for_each_map_elem_proto;
15281534
case BPF_FUNC_loop:

kernel/bpf/ringbuf.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = {
475475
.arg1_type = ARG_CONST_MAP_PTR,
476476
.arg2_type = ARG_ANYTHING,
477477
};
478+
479+
BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags,
480+
struct bpf_dynptr_kern *, ptr)
481+
{
482+
struct bpf_ringbuf_map *rb_map;
483+
void *sample;
484+
int err;
485+
486+
if (unlikely(flags)) {
487+
bpf_dynptr_set_null(ptr);
488+
return -EINVAL;
489+
}
490+
491+
err = bpf_dynptr_check_size(size);
492+
if (err) {
493+
bpf_dynptr_set_null(ptr);
494+
return err;
495+
}
496+
497+
rb_map = container_of(map, struct bpf_ringbuf_map, map);
498+
499+
sample = __bpf_ringbuf_reserve(rb_map->rb, size);
500+
if (!sample) {
501+
bpf_dynptr_set_null(ptr);
502+
return -EINVAL;
503+
}
504+
505+
bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size);
506+
507+
return 0;
508+
}
509+
510+
const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = {
511+
.func = bpf_ringbuf_reserve_dynptr,
512+
.ret_type = RET_INTEGER,
513+
.arg1_type = ARG_CONST_MAP_PTR,
514+
.arg2_type = ARG_ANYTHING,
515+
.arg3_type = ARG_ANYTHING,
516+
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT,
517+
};
518+
519+
BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
520+
{
521+
if (!ptr->data)
522+
return 0;
523+
524+
bpf_ringbuf_commit(ptr->data, flags, false /* discard */);
525+
526+
bpf_dynptr_set_null(ptr);
527+
528+
return 0;
529+
}
530+
531+
const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = {
532+
.func = bpf_ringbuf_submit_dynptr,
533+
.ret_type = RET_VOID,
534+
.arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
535+
.arg2_type = ARG_ANYTHING,
536+
};
537+
538+
BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags)
539+
{
540+
if (!ptr->data)
541+
return 0;
542+
543+
bpf_ringbuf_commit(ptr->data, flags, true /* discard */);
544+
545+
bpf_dynptr_set_null(ptr);
546+
547+
return 0;
548+
}
549+
550+
const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
551+
.func = bpf_ringbuf_discard_dynptr,
552+
.ret_type = RET_VOID,
553+
.arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
554+
.arg2_type = ARG_ANYTHING,
555+
};

kernel/bpf/verifier.c

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem {
187187
POISON_POINTER_DELTA))
188188
#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
189189

190+
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
191+
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
192+
190193
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
191194
{
192195
return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
@@ -673,17 +676,24 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
673676
switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
674677
case DYNPTR_TYPE_LOCAL:
675678
return BPF_DYNPTR_TYPE_LOCAL;
679+
case DYNPTR_TYPE_RINGBUF:
680+
return BPF_DYNPTR_TYPE_RINGBUF;
676681
default:
677682
return BPF_DYNPTR_TYPE_INVALID;
678683
}
679684
}
680685

686+
static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
687+
{
688+
return type == BPF_DYNPTR_TYPE_RINGBUF;
689+
}
690+
681691
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
682692
enum bpf_arg_type arg_type, int insn_idx)
683693
{
684694
struct bpf_func_state *state = func(env, reg);
685695
enum bpf_dynptr_type type;
686-
int spi, i;
696+
int spi, i, id;
687697

688698
spi = get_spi(reg->off);
689699

@@ -703,6 +713,16 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
703713
state->stack[spi].spilled_ptr.dynptr.type = type;
704714
state->stack[spi - 1].spilled_ptr.dynptr.type = type;
705715

716+
if (dynptr_type_refcounted(type)) {
717+
/* The id is used to track proper releasing */
718+
id = acquire_reference_state(env, insn_idx);
719+
if (id < 0)
720+
return id;
721+
722+
state->stack[spi].spilled_ptr.id = id;
723+
state->stack[spi - 1].spilled_ptr.id = id;
724+
}
725+
706726
return 0;
707727
}
708728

@@ -721,6 +741,13 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
721741
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
722742
}
723743

744+
/* Invalidate any slices associated with this dynptr */
745+
if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
746+
release_reference(env, state->stack[spi].spilled_ptr.id);
747+
state->stack[spi].spilled_ptr.id = 0;
748+
state->stack[spi - 1].spilled_ptr.id = 0;
749+
}
750+
724751
state->stack[spi].spilled_ptr.dynptr.first_slot = false;
725752
state->stack[spi].spilled_ptr.dynptr.type = 0;
726753
state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
@@ -5859,7 +5886,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
58595886

58605887
skip_type_check:
58615888
if (arg_type_is_release(arg_type)) {
5862-
if (!reg->ref_obj_id && !register_is_null(reg)) {
5889+
if (arg_type_is_dynptr(arg_type)) {
5890+
struct bpf_func_state *state = func(env, reg);
5891+
int spi = get_spi(reg->off);
5892+
5893+
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
5894+
!state->stack[spi].spilled_ptr.id) {
5895+
verbose(env, "arg %d is an unacquired reference\n", regno);
5896+
return -EINVAL;
5897+
}
5898+
} else if (!reg->ref_obj_id && !register_is_null(reg)) {
58635899
verbose(env, "R%d must be referenced when passed to release function\n",
58645900
regno);
58655901
return -EINVAL;
@@ -5994,9 +6030,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
59946030
case DYNPTR_TYPE_LOCAL:
59956031
err_extra = "local ";
59966032
break;
6033+
case DYNPTR_TYPE_RINGBUF:
6034+
err_extra = "ringbuf ";
6035+
break;
59976036
default:
59986037
break;
59996038
}
6039+
60006040
verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
60016041
err_extra, arg + 1);
60026042
return -EINVAL;
@@ -6122,7 +6162,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
61226162
case BPF_MAP_TYPE_RINGBUF:
61236163
if (func_id != BPF_FUNC_ringbuf_output &&
61246164
func_id != BPF_FUNC_ringbuf_reserve &&
6125-
func_id != BPF_FUNC_ringbuf_query)
6165+
func_id != BPF_FUNC_ringbuf_query &&
6166+
func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
6167+
func_id != BPF_FUNC_ringbuf_submit_dynptr &&
6168+
func_id != BPF_FUNC_ringbuf_discard_dynptr)
61266169
goto error;
61276170
break;
61286171
case BPF_MAP_TYPE_STACK_TRACE:
@@ -6238,6 +6281,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
62386281
case BPF_FUNC_ringbuf_output:
62396282
case BPF_FUNC_ringbuf_reserve:
62406283
case BPF_FUNC_ringbuf_query:
6284+
case BPF_FUNC_ringbuf_reserve_dynptr:
6285+
case BPF_FUNC_ringbuf_submit_dynptr:
6286+
case BPF_FUNC_ringbuf_discard_dynptr:
62416287
if (map->map_type != BPF_MAP_TYPE_RINGBUF)
62426288
goto error;
62436289
break;

tools/include/uapi/linux/bpf.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5189,6 +5189,38 @@ union bpf_attr {
51895189
* Return
51905190
* 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE,
51915191
* -EINVAL if flags is not 0.
5192+
*
5193+
* long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr)
5194+
* Description
5195+
* Reserve *size* bytes of payload in a ring buffer *ringbuf*
5196+
* through the dynptr interface. *flags* must be 0.
5197+
*
5198+
* Please note that a corresponding bpf_ringbuf_submit_dynptr or
5199+
* bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the
5200+
* reservation fails. This is enforced by the verifier.
5201+
* Return
5202+
* 0 on success, or a negative error in case of failure.
5203+
*
5204+
* void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags)
5205+
* Description
5206+
* Submit reserved ring buffer sample, pointed to by *data*,
5207+
* through the dynptr interface. This is a no-op if the dynptr is
5208+
* invalid/null.
5209+
*
5210+
* For more information on *flags*, please see
5211+
* 'bpf_ringbuf_submit'.
5212+
* Return
5213+
* Nothing. Always succeeds.
5214+
*
5215+
* void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags)
5216+
* Description
5217+
* Discard reserved ring buffer sample through the dynptr
5218+
* interface. This is a no-op if the dynptr is invalid/null.
5219+
*
5220+
* For more information on *flags*, please see
5221+
* 'bpf_ringbuf_discard'.
5222+
* Return
5223+
* Nothing. Always succeeds.
51925224
*/
51935225
#define __BPF_FUNC_MAPPER(FN) \
51945226
FN(unspec), \
@@ -5389,6 +5421,9 @@ union bpf_attr {
53895421
FN(map_lookup_percpu_elem), \
53905422
FN(skc_to_mptcp_sock), \
53915423
FN(dynptr_from_mem), \
5424+
FN(ringbuf_reserve_dynptr), \
5425+
FN(ringbuf_submit_dynptr), \
5426+
FN(ringbuf_discard_dynptr), \
53925427
/* */
53935428

53945429
/* integer value in 'imm' field of BPF_CALL instruction selects which helper

0 commit comments

Comments
 (0)