Skip to content

Commit f08c256

Browse files
author
Sabrina Dubroca
committed
espintcp: remove encap socket caching to avoid reference leak
JIRA: https://issues.redhat.com/browse/RHEL-115629 commit 0283636 Author: Sabrina Dubroca <sd@queasysnail.net> Date: Wed Apr 9 15:59:57 2025 +0200 espintcp: remove encap socket caching to avoid reference leak The current scheme for caching the encap socket can lead to reference leaks when we try to delete the netns. The reference chain is: xfrm_state -> enacp_sk -> netns Since the encap socket is a userspace socket, it holds a reference on the netns. If we delete the espintcp state (through flush or individual delete) before removing the netns, the reference on the socket is dropped and the netns is correctly deleted. Otherwise, the netns may not be reachable anymore (if all processes within the ns have terminated), so we cannot delete the xfrm state to drop its reference on the socket. This patch results in a small (~2% in my tests) performance regression. A GC-type mechanism could be added for the socket cache, to clear references if the state hasn't been used "recently", but it's a lot more complex than just not caching the socket. Fixes: e27cca9 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Sabrina Dubroca <sdubroca@redhat.com>
1 parent c0309de commit f08c256

File tree

4 files changed

+8
-94
lines changed

4 files changed

+8
-94
lines changed

include/net/xfrm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,6 @@ struct xfrm_state {
239239

240240
/* Data for encapsulator */
241241
struct xfrm_encap_tmpl *encap;
242-
struct sock __rcu *encap_sk;
243242

244243
/* Data for care-of address */
245244
xfrm_address_t *coaddr;

net/ipv4/esp4.c

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
120120
}
121121

122122
#ifdef CONFIG_INET_ESPINTCP
123-
struct esp_tcp_sk {
124-
struct sock *sk;
125-
struct rcu_head rcu;
126-
};
127-
128-
static void esp_free_tcp_sk(struct rcu_head *head)
129-
{
130-
struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
131-
132-
sock_put(esk->sk);
133-
kfree(esk);
134-
}
135-
136123
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
137124
{
138125
struct xfrm_encap_tmpl *encap = x->encap;
139126
struct net *net = xs_net(x);
140-
struct esp_tcp_sk *esk;
141127
__be16 sport, dport;
142-
struct sock *nsk;
143128
struct sock *sk;
144129

145-
sk = rcu_dereference(x->encap_sk);
146-
if (sk && sk->sk_state == TCP_ESTABLISHED)
147-
return sk;
148-
149130
spin_lock_bh(&x->lock);
150131
sport = encap->encap_sport;
151132
dport = encap->encap_dport;
152-
nsk = rcu_dereference_protected(x->encap_sk,
153-
lockdep_is_held(&x->lock));
154-
if (sk && sk == nsk) {
155-
esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
156-
if (!esk) {
157-
spin_unlock_bh(&x->lock);
158-
return ERR_PTR(-ENOMEM);
159-
}
160-
RCU_INIT_POINTER(x->encap_sk, NULL);
161-
esk->sk = sk;
162-
call_rcu(&esk->rcu, esp_free_tcp_sk);
163-
}
164133
spin_unlock_bh(&x->lock);
165134

166135
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
173142
return ERR_PTR(-EINVAL);
174143
}
175144

176-
spin_lock_bh(&x->lock);
177-
nsk = rcu_dereference_protected(x->encap_sk,
178-
lockdep_is_held(&x->lock));
179-
if (encap->encap_sport != sport ||
180-
encap->encap_dport != dport) {
181-
sock_put(sk);
182-
sk = nsk ?: ERR_PTR(-EREMCHG);
183-
} else if (sk == nsk) {
184-
sock_put(sk);
185-
} else {
186-
rcu_assign_pointer(x->encap_sk, sk);
187-
}
188-
spin_unlock_bh(&x->lock);
189-
190145
return sk;
191146
}
192147

@@ -211,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
211166
err = espintcp_push_skb(sk, skb);
212167
bh_unlock_sock(sk);
213168

169+
sock_put(sk);
170+
214171
out:
215172
rcu_read_unlock();
216173
return err;
@@ -396,6 +353,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
396353
if (IS_ERR(sk))
397354
return ERR_CAST(sk);
398355

356+
sock_put(sk);
357+
399358
*lenp = htons(len);
400359
esph = (struct ip_esp_hdr *)(lenp + 1);
401360

net/ipv6/esp6.c

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -136,47 +136,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
136136
}
137137

138138
#ifdef CONFIG_INET6_ESPINTCP
139-
struct esp_tcp_sk {
140-
struct sock *sk;
141-
struct rcu_head rcu;
142-
};
143-
144-
static void esp_free_tcp_sk(struct rcu_head *head)
145-
{
146-
struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
147-
148-
sock_put(esk->sk);
149-
kfree(esk);
150-
}
151-
152139
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
153140
{
154141
struct xfrm_encap_tmpl *encap = x->encap;
155142
struct net *net = xs_net(x);
156-
struct esp_tcp_sk *esk;
157143
__be16 sport, dport;
158-
struct sock *nsk;
159144
struct sock *sk;
160145

161-
sk = rcu_dereference(x->encap_sk);
162-
if (sk && sk->sk_state == TCP_ESTABLISHED)
163-
return sk;
164-
165146
spin_lock_bh(&x->lock);
166147
sport = encap->encap_sport;
167148
dport = encap->encap_dport;
168-
nsk = rcu_dereference_protected(x->encap_sk,
169-
lockdep_is_held(&x->lock));
170-
if (sk && sk == nsk) {
171-
esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
172-
if (!esk) {
173-
spin_unlock_bh(&x->lock);
174-
return ERR_PTR(-ENOMEM);
175-
}
176-
RCU_INIT_POINTER(x->encap_sk, NULL);
177-
esk->sk = sk;
178-
call_rcu(&esk->rcu, esp_free_tcp_sk);
179-
}
180149
spin_unlock_bh(&x->lock);
181150

182151
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -189,20 +158,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
189158
return ERR_PTR(-EINVAL);
190159
}
191160

192-
spin_lock_bh(&x->lock);
193-
nsk = rcu_dereference_protected(x->encap_sk,
194-
lockdep_is_held(&x->lock));
195-
if (encap->encap_sport != sport ||
196-
encap->encap_dport != dport) {
197-
sock_put(sk);
198-
sk = nsk ?: ERR_PTR(-EREMCHG);
199-
} else if (sk == nsk) {
200-
sock_put(sk);
201-
} else {
202-
rcu_assign_pointer(x->encap_sk, sk);
203-
}
204-
spin_unlock_bh(&x->lock);
205-
206161
return sk;
207162
}
208163

@@ -227,6 +182,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
227182
err = espintcp_push_skb(sk, skb);
228183
bh_unlock_sock(sk);
229184

185+
sock_put(sk);
186+
230187
out:
231188
rcu_read_unlock();
232189
return err;
@@ -431,6 +388,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
431388
if (IS_ERR(sk))
432389
return ERR_CAST(sk);
433390

391+
sock_put(sk);
392+
434393
*lenp = htons(len);
435394
esph = (struct ip_esp_hdr *)(lenp + 1);
436395

net/xfrm/xfrm_state.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
764764
net->xfrm.state_num--;
765765
spin_unlock(&net->xfrm.xfrm_state_lock);
766766

767-
if (x->encap_sk)
768-
sock_put(rcu_dereference_raw(x->encap_sk));
769-
770767
xfrm_dev_state_delete(x);
771768

772769
/* All xfrm_state objects are created by xfrm_state_alloc.

0 commit comments

Comments
 (0)