Skip to content

Commit d74bad4

Browse files
rdnaborkmann
authored andcommitted
bpf: Hooks for sys_connect
== The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 3679d58 commit d74bad4

File tree

13 files changed

+202
-1
lines changed

13 files changed

+202
-1
lines changed

include/linux/bpf-cgroup.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
116116
__ret; \
117117
})
118118

119+
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
120+
({ \
121+
int __ret = 0; \
122+
if (cgroup_bpf_enabled) { \
123+
lock_sock(sk); \
124+
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
125+
release_sock(sk); \
126+
} \
127+
__ret; \
128+
})
129+
119130
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
120131
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
121132

122133
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
123134
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
124135

136+
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
137+
sk->sk_prot->pre_connect)
138+
139+
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
140+
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
141+
142+
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
143+
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
144+
145+
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
146+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
147+
148+
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
149+
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
150+
125151
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
126152
({ \
127153
int __ret = 0; \
@@ -151,11 +177,16 @@ struct cgroup_bpf {};
151177
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
152178
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
153179

180+
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
154181
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
155182
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
156183
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
157184
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
158185
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
186+
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
187+
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
188+
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
189+
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
159190
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
160191
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
161192

include/net/addrconf.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,13 @@ struct ipv6_stub {
231231
};
232232
extern const struct ipv6_stub *ipv6_stub __read_mostly;
233233

234+
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
235+
struct ipv6_bpf_stub {
236+
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
237+
bool force_bind_address_no_port, bool with_lock);
238+
};
239+
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
240+
234241
/*
235242
* identify MLD packets for MLD filter exceptions
236243
*/

include/net/sock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
10261026
struct proto {
10271027
void (*close)(struct sock *sk,
10281028
long timeout);
1029+
int (*pre_connect)(struct sock *sk,
1030+
struct sockaddr *uaddr,
1031+
int addr_len);
10291032
int (*connect)(struct sock *sk,
10301033
struct sockaddr *uaddr,
10311034
int addr_len);

include/net/udp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
273273
int udp_rcv(struct sk_buff *skb);
274274
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
275275
int udp_init_sock(struct sock *sk);
276+
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
276277
int __udp_disconnect(struct sock *sk, int flags);
277278
int udp_disconnect(struct sock *sk, int flags);
278279
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);

include/uapi/linux/bpf.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ enum bpf_attach_type {
150150
BPF_SK_MSG_VERDICT,
151151
BPF_CGROUP_INET4_BIND,
152152
BPF_CGROUP_INET6_BIND,
153+
BPF_CGROUP_INET4_CONNECT,
154+
BPF_CGROUP_INET6_CONNECT,
153155
__MAX_BPF_ATTACH_TYPE
154156
};
155157

@@ -744,6 +746,13 @@ union bpf_attr {
744746
* @flags: reserved for future use
745747
* Return: SK_PASS
746748
*
749+
* int bpf_bind(ctx, addr, addr_len)
750+
* Bind socket to address. Only binding to IP is supported, no port can be
751+
* set in addr.
752+
* @ctx: pointer to context of type bpf_sock_addr
753+
* @addr: pointer to struct sockaddr to bind socket to
754+
* @addr_len: length of sockaddr structure
755+
* Return: 0 on success or negative error code
747756
*/
748757
#define __BPF_FUNC_MAPPER(FN) \
749758
FN(unspec), \
@@ -809,7 +818,8 @@ union bpf_attr {
809818
FN(msg_redirect_map), \
810819
FN(msg_apply_bytes), \
811820
FN(msg_cork_bytes), \
812-
FN(msg_pull_data),
821+
FN(msg_pull_data), \
822+
FN(bind),
813823

814824
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
815825
* function eBPF program intends to call

kernel/bpf/syscall.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
11801180
switch (expected_attach_type) {
11811181
case BPF_CGROUP_INET4_BIND:
11821182
case BPF_CGROUP_INET6_BIND:
1183+
case BPF_CGROUP_INET4_CONNECT:
1184+
case BPF_CGROUP_INET6_CONNECT:
11831185
return 0;
11841186
default:
11851187
return -EINVAL;
@@ -1491,6 +1493,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
14911493
break;
14921494
case BPF_CGROUP_INET4_BIND:
14931495
case BPF_CGROUP_INET6_BIND:
1496+
case BPF_CGROUP_INET4_CONNECT:
1497+
case BPF_CGROUP_INET6_CONNECT:
14941498
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
14951499
break;
14961500
case BPF_CGROUP_SOCK_OPS:
@@ -1557,6 +1561,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
15571561
break;
15581562
case BPF_CGROUP_INET4_BIND:
15591563
case BPF_CGROUP_INET6_BIND:
1564+
case BPF_CGROUP_INET4_CONNECT:
1565+
case BPF_CGROUP_INET6_CONNECT:
15601566
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
15611567
break;
15621568
case BPF_CGROUP_SOCK_OPS:
@@ -1610,6 +1616,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
16101616
case BPF_CGROUP_INET_SOCK_CREATE:
16111617
case BPF_CGROUP_INET4_BIND:
16121618
case BPF_CGROUP_INET6_BIND:
1619+
case BPF_CGROUP_INET4_CONNECT:
1620+
case BPF_CGROUP_INET6_CONNECT:
16131621
case BPF_CGROUP_SOCK_OPS:
16141622
case BPF_CGROUP_DEVICE:
16151623
break;

net/core/filter.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <linux/if_packet.h>
3434
#include <linux/if_arp.h>
3535
#include <linux/gfp.h>
36+
#include <net/inet_common.h>
3637
#include <net/ip.h>
3738
#include <net/protocol.h>
3839
#include <net/netlink.h>
@@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
36563657
.arg2_type = ARG_ANYTHING,
36573658
};
36583659

3660+
const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
3661+
EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
3662+
3663+
BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
3664+
int, addr_len)
3665+
{
3666+
#ifdef CONFIG_INET
3667+
struct sock *sk = ctx->sk;
3668+
int err;
3669+
3670+
/* Binding to port can be expensive so it's prohibited in the helper.
3671+
* Only binding to IP is supported.
3672+
*/
3673+
err = -EINVAL;
3674+
if (addr->sa_family == AF_INET) {
3675+
if (addr_len < sizeof(struct sockaddr_in))
3676+
return err;
3677+
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
3678+
return err;
3679+
return __inet_bind(sk, addr, addr_len, true, false);
3680+
#if IS_ENABLED(CONFIG_IPV6)
3681+
} else if (addr->sa_family == AF_INET6) {
3682+
if (addr_len < SIN6_LEN_RFC2133)
3683+
return err;
3684+
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
3685+
return err;
3686+
/* ipv6_bpf_stub cannot be NULL, since it's called from
3687+
* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
3688+
*/
3689+
return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
3690+
#endif /* CONFIG_IPV6 */
3691+
}
3692+
#endif /* CONFIG_INET */
3693+
3694+
return -EAFNOSUPPORT;
3695+
}
3696+
3697+
static const struct bpf_func_proto bpf_bind_proto = {
3698+
.func = bpf_bind,
3699+
.gpl_only = false,
3700+
.ret_type = RET_INTEGER,
3701+
.arg1_type = ARG_PTR_TO_CTX,
3702+
.arg2_type = ARG_PTR_TO_MEM,
3703+
.arg3_type = ARG_CONST_SIZE,
3704+
};
3705+
36593706
static const struct bpf_func_proto *
36603707
bpf_base_func_proto(enum bpf_func_id func_id)
36613708
{
@@ -3707,6 +3754,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
37073754
*/
37083755
case BPF_FUNC_get_current_uid_gid:
37093756
return &bpf_get_current_uid_gid_proto;
3757+
case BPF_FUNC_bind:
3758+
switch (prog->expected_attach_type) {
3759+
case BPF_CGROUP_INET4_CONNECT:
3760+
case BPF_CGROUP_INET6_CONNECT:
3761+
return &bpf_bind_proto;
3762+
default:
3763+
return NULL;
3764+
}
37103765
default:
37113766
return bpf_base_func_proto(func_id);
37123767
}
@@ -4213,6 +4268,7 @@ static bool sock_addr_is_valid_access(int off, int size,
42134268
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
42144269
switch (prog->expected_attach_type) {
42154270
case BPF_CGROUP_INET4_BIND:
4271+
case BPF_CGROUP_INET4_CONNECT:
42164272
break;
42174273
default:
42184274
return false;
@@ -4221,6 +4277,7 @@ static bool sock_addr_is_valid_access(int off, int size,
42214277
case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
42224278
switch (prog->expected_attach_type) {
42234279
case BPF_CGROUP_INET6_BIND:
4280+
case BPF_CGROUP_INET6_CONNECT:
42244281
break;
42254282
default:
42264283
return false;

net/ipv4/af_inet.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,19 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
547547
int addr_len, int flags)
548548
{
549549
struct sock *sk = sock->sk;
550+
int err;
550551

551552
if (addr_len < sizeof(uaddr->sa_family))
552553
return -EINVAL;
553554
if (uaddr->sa_family == AF_UNSPEC)
554555
return sk->sk_prot->disconnect(sk, flags);
555556

557+
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
558+
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
559+
if (err)
560+
return err;
561+
}
562+
556563
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
557564
return -EAGAIN;
558565
return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -633,6 +640,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
633640
if (sk->sk_state != TCP_CLOSE)
634641
goto out;
635642

643+
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
644+
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
645+
if (err)
646+
goto out;
647+
}
648+
636649
err = sk->sk_prot->connect(sk, uaddr, addr_len);
637650
if (err < 0)
638651
goto out;

net/ipv4/tcp_ipv4.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
140140
}
141141
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142142

143+
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
144+
int addr_len)
145+
{
146+
/* This check is replicated from tcp_v4_connect() and intended to
147+
* prevent BPF program called below from accessing bytes that are out
148+
* of the bound specified by user in addr_len.
149+
*/
150+
if (addr_len < sizeof(struct sockaddr_in))
151+
return -EINVAL;
152+
153+
sock_owned_by_me(sk);
154+
155+
return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
156+
}
157+
143158
/* This will initiate an outgoing connection. */
144159
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145160
{
@@ -2409,6 +2424,7 @@ struct proto tcp_prot = {
24092424
.name = "TCP",
24102425
.owner = THIS_MODULE,
24112426
.close = tcp_close,
2427+
.pre_connect = tcp_v4_pre_connect,
24122428
.connect = tcp_v4_connect,
24132429
.disconnect = tcp_disconnect,
24142430
.accept = inet_csk_accept,

net/ipv4/udp.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,6 +1658,19 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
16581658
goto try_again;
16591659
}
16601660

1661+
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1662+
{
1663+
/* This check is replicated from __ip4_datagram_connect() and
1664+
* intended to prevent BPF program called below from accessing bytes
1665+
* that are out of the bound specified by user in addr_len.
1666+
*/
1667+
if (addr_len < sizeof(struct sockaddr_in))
1668+
return -EINVAL;
1669+
1670+
return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
1671+
}
1672+
EXPORT_SYMBOL(udp_pre_connect);
1673+
16611674
int __udp_disconnect(struct sock *sk, int flags)
16621675
{
16631676
struct inet_sock *inet = inet_sk(sk);
@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
25302543
.name = "UDP",
25312544
.owner = THIS_MODULE,
25322545
.close = udp_lib_close,
2546+
.pre_connect = udp_pre_connect,
25332547
.connect = ip4_datagram_connect,
25342548
.disconnect = udp_disconnect,
25352549
.ioctl = udp_ioctl,

net/ipv6/af_inet6.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
887887
.nd_tbl = &nd_tbl,
888888
};
889889

890+
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
891+
.inet6_bind = __inet6_bind,
892+
};
893+
890894
static int __init inet6_init(void)
891895
{
892896
struct list_head *r;
@@ -1043,6 +1047,7 @@ static int __init inet6_init(void)
10431047
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
10441048
wmb();
10451049
ipv6_stub = &ipv6_stub_impl;
1050+
ipv6_bpf_stub = &ipv6_bpf_stub_impl;
10461051
out:
10471052
return err;
10481053

net/ipv6/tcp_ipv6.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
117117
ipv6_hdr(skb)->saddr.s6_addr32);
118118
}
119119

120+
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
121+
int addr_len)
122+
{
123+
/* This check is replicated from tcp_v6_connect() and intended to
124+
* prevent BPF program called below from accessing bytes that are out
125+
* of the bound specified by user in addr_len.
126+
*/
127+
if (addr_len < SIN6_LEN_RFC2133)
128+
return -EINVAL;
129+
130+
sock_owned_by_me(sk);
131+
132+
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
133+
}
134+
120135
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121136
int addr_len)
122137
{
@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
19251940
.name = "TCPv6",
19261941
.owner = THIS_MODULE,
19271942
.close = tcp_close,
1943+
.pre_connect = tcp_v6_pre_connect,
19281944
.connect = tcp_v6_connect,
19291945
.disconnect = tcp_disconnect,
19301946
.accept = inet_csk_accept,

0 commit comments

Comments
 (0)