Skip to content

Commit e9b6f5c

Browse files
committed
bpf: Add bpf_sock_destroy kfunc
JIRA: https://issues.redhat.com/browse/RHEL-65787 Conflicts: Context difference due to missing af9784d ("tcp: diag: add support for TIME_WAIT sockets to tcp_abort()") and out-of-order backport of bac76cf ("tcp: fix forever orphan socket caused by tcp_abort") commit 4ddbcb8 Author: Aditi Ghag <aditi.ghag@isovalent.com> Date: Fri May 19 22:51:55 2023 +0000 bpf: Add bpf_sock_destroy kfunc The socket destroy kfunc is used to forcefully terminate sockets from certain BPF contexts. We plan to use the capability in Cilium load-balancing to terminate client sockets that continue to connect to deleted backends. The other use case is on-the-fly policy enforcement where existing socket connections prevented by policies need to be forcefully terminated. The kfunc also allows terminating sockets that may or may not be actively sending traffic. The kfunc can currently be called only from BPF TCP and UDP iterators where users can filter, and terminate selected sockets. More specifically, it can only be called from BPF contexts that ensure socket locking in order to allow synchronous execution of protocol specific `diag_destroy` handlers. The previous commit that batches UDP sockets during iteration facilitated a synchronous invocation of the UDP destroy callback from BPF context by skipping socket locks in `udp_abort`. TCP iterator already supported batching of sockets being iterated. To that end, `tracing_iter_filter` callback filter is added so that verifier can restrict the kfunc to programs with `BPF_TRACE_ITER` attach type, and reject other programs. The kfunc takes `sock_common` type argument, even though it expects, and casts them to a `sock` pointer. This enables the verifier to allow the sock_destroy kfunc to be called for TCP with `sock_common` and UDP with `sock` structs. Furthermore, as `sock_common` only has a subset of certain fields of `sock`, casting pointer to the latter type might not always be safe for certain sockets like request sockets, but these have a special handling in the diag_destroy handlers. Additionally, the kfunc is defined with `KF_TRUSTED_ARGS` flag to avoid the cases where a `PTR_TO_BTF_ID` sk is obtained by following another pointer. eg. getting a sk pointer (may be even NULL) by following another sk pointer. The pointer socket argument passed in TCP and UDP iterators is tagged as `PTR_TRUSTED` in {tcp,udp}_reg_info. The TRUSTED arg changes are contributed by Martin KaFai Lau <martin.lau@kernel.org>. Signed-off-by: Aditi Ghag <aditi.ghag@isovalent.com> Link: https://lore.kernel.org/r/20230519225157.760788-8-aditi.ghag@isovalent.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
1 parent 692dba9 commit e9b6f5c

File tree

4 files changed

+75
-7
lines changed

4 files changed

+75
-7
lines changed

net/core/filter.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11891,3 +11891,66 @@ static int __init bpf_kfunc_init(void)
1189111891
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
1189211892
}
1189311893
late_initcall(bpf_kfunc_init);
11894+
11895+
/* Disables missing prototype warnings */
11896+
__diag_push();
11897+
__diag_ignore_all("-Wmissing-prototypes",
11898+
"Global functions as their definitions will be in vmlinux BTF");
11899+
11900+
/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
11901+
*
11902+
* The function expects a non-NULL pointer to a socket, and invokes the
11903+
* protocol specific socket destroy handlers.
11904+
*
11905+
* The helper can only be called from BPF contexts that have acquired the socket
11906+
* locks.
11907+
*
11908+
* Parameters:
11909+
* @sock: Pointer to socket to be destroyed
11910+
*
11911+
* Return:
11912+
* On error, may return EPROTONOSUPPORT, EINVAL.
11913+
* EPROTONOSUPPORT if protocol specific destroy handler is not supported.
11914+
* 0 otherwise
11915+
*/
11916+
__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
11917+
{
11918+
struct sock *sk = (struct sock *)sock;
11919+
11920+
/* The locking semantics that allow for synchronous execution of the
11921+
* destroy handlers are only supported for TCP and UDP.
11922+
* Supporting protocols will need to acquire sock lock in the BPF context
11923+
* prior to invoking this kfunc.
11924+
*/
11925+
if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
11926+
sk->sk_protocol != IPPROTO_UDP))
11927+
return -EOPNOTSUPP;
11928+
11929+
return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
11930+
}
11931+
11932+
__diag_pop()
11933+
11934+
BTF_SET8_START(bpf_sk_iter_kfunc_ids)
11935+
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
11936+
BTF_SET8_END(bpf_sk_iter_kfunc_ids)
11937+
11938+
static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
11939+
{
11940+
if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
11941+
prog->expected_attach_type != BPF_TRACE_ITER)
11942+
return -EACCES;
11943+
return 0;
11944+
}
11945+
11946+
static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
11947+
.owner = THIS_MODULE,
11948+
.set = &bpf_sk_iter_kfunc_ids,
11949+
.filter = tracing_iter_filter,
11950+
};
11951+
11952+
static int init_subsystem(void)
11953+
{
11954+
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
11955+
}
11956+
late_initcall(init_subsystem);

net/ipv4/tcp.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4716,8 +4716,10 @@ int tcp_abort(struct sock *sk, int err)
47164716
return -EOPNOTSUPP;
47174717
}
47184718

4719-
/* Don't race with userspace socket closes such as tcp_close. */
4720-
lock_sock(sk);
4719+
/* BPF context ensures sock locking. */
4720+
if (!has_current_bpf_ctx())
4721+
/* Don't race with userspace socket closes such as tcp_close. */
4722+
lock_sock(sk);
47214723

47224724
/* Avoid closing the same socket twice. */
47234725
if (sk->sk_state == TCP_CLOSE) {
@@ -4745,7 +4747,8 @@ int tcp_abort(struct sock *sk, int err)
47454747

47464748
bh_unlock_sock(sk);
47474749
local_bh_enable();
4748-
release_sock(sk);
4750+
if (!has_current_bpf_ctx())
4751+
release_sock(sk);
47494752
return 0;
47504753
}
47514754
EXPORT_SYMBOL_GPL(tcp_abort);

net/ipv4/tcp_ipv4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3302,7 +3302,7 @@ static struct bpf_iter_reg tcp_reg_info = {
33023302
.ctx_arg_info_size = 1,
33033303
.ctx_arg_info = {
33043304
{ offsetof(struct bpf_iter__tcp, sk_common),
3305-
PTR_TO_BTF_ID_OR_NULL },
3305+
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
33063306
},
33073307
.get_func_proto = bpf_iter_tcp_get_func_proto,
33083308
.seq_info = &tcp_seq_info,

net/ipv4/udp.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2938,7 +2938,8 @@ EXPORT_SYMBOL(udp_poll);
29382938

29392939
int udp_abort(struct sock *sk, int err)
29402940
{
2941-
lock_sock(sk);
2941+
if (!has_current_bpf_ctx())
2942+
lock_sock(sk);
29422943

29432944
/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
29442945
* with close()
@@ -2951,7 +2952,8 @@ int udp_abort(struct sock *sk, int err)
29512952
__udp_disconnect(sk, 0);
29522953

29532954
out:
2954-
release_sock(sk);
2955+
if (!has_current_bpf_ctx())
2956+
release_sock(sk);
29552957

29562958
return 0;
29572959
}
@@ -3563,7 +3565,7 @@ static struct bpf_iter_reg udp_reg_info = {
35633565
.ctx_arg_info_size = 1,
35643566
.ctx_arg_info = {
35653567
{ offsetof(struct bpf_iter__udp, udp_sk),
3566-
PTR_TO_BTF_ID_OR_NULL },
3568+
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
35673569
},
35683570
.seq_info = &udp_seq_info,
35693571
};

0 commit comments

Comments
 (0)