Skip to content

Commit b00c3ce

Browse files
committed
Merge: netfilter: nf_conntrack: fix crash due to removal of uninitialised entry
MR: https://gitlab.com/redhat/rhel/src/kernel/rhel-10/-/merge_requests/281 JIRA: https://issues.redhat.com/browse/RHEL-106441 CVE: CVE-2025-38472 Upstream Status: linux.git There is a race in conntrack where an entry that has already been unlinked from the hashlist can be unlinked again. The other changes in this MR extend existing test case to exercise more code paths. Signed-off-by: Florian Westphal <fwestpha@redhat.com> Closes RHEL-106441 Approved-by: Murphy Zhou <xzhou@redhat.com> Approved-by: Guillaume Nault <gnault@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Julio Faracco <jfaracco@redhat.com>
2 parents f67b66c + 0e6ea24 commit b00c3ce

File tree

4 files changed

+111
-13
lines changed

4 files changed

+111
-13
lines changed

include/net/netfilter/nf_conntrack.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,19 @@ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
306306
/* use after obtaining a reference count */
307307
static inline bool nf_ct_should_gc(const struct nf_conn *ct)
308308
{
309-
return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
310-
!nf_ct_is_dying(ct);
309+
if (!nf_ct_is_confirmed(ct))
310+
return false;
311+
312+
/* load ct->timeout after is_confirmed() test.
313+
* Pairs with __nf_conntrack_confirm() which:
314+
* 1. Increases ct->timeout value
315+
* 2. Inserts ct into rcu hlist
316+
* 3. Sets the confirmed bit
317+
* 4. Unlocks the hlist lock
318+
*/
319+
smp_acquire__after_ctrl_dep();
320+
321+
return nf_ct_is_expired(ct) && !nf_ct_is_dying(ct);
311322
}
312323

313324
#define NF_CT_DAY (86400 * HZ)

net/netfilter/nf_conntrack_core.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
11261126

11271127
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
11281128
&nf_conntrack_hash[repl_idx]);
1129+
/* confirmed bit must be set after hlist add, not before:
1130+
* loser_ct can still be visible to other cpu due to
1131+
* SLAB_TYPESAFE_BY_RCU.
1132+
*/
1133+
smp_mb__before_atomic();
1134+
set_bit(IPS_CONFIRMED_BIT, &loser_ct->status);
11291135

11301136
NF_CT_STAT_INC(net, clash_resolve);
11311137
return NF_ACCEPT;
@@ -1262,8 +1268,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
12621268
* user context, else we insert an already 'dead' hash, blocking
12631269
* further use of that particular connection -JM.
12641270
*/
1265-
ct->status |= IPS_CONFIRMED;
1266-
12671271
if (unlikely(nf_ct_is_dying(ct))) {
12681272
NF_CT_STAT_INC(net, insert_failed);
12691273
goto dying;
@@ -1295,19 +1299,29 @@ __nf_conntrack_confirm(struct sk_buff *skb)
12951299
}
12961300
}
12971301

1298-
/* Timer relative to confirmation time, not original
1302+
/* Timeout is relative to confirmation time, not original
12991303
setting time, otherwise we'd get timer wrap in
13001304
weird delay cases. */
13011305
ct->timeout += nfct_time_stamp;
13021306

13031307
__nf_conntrack_insert_prepare(ct);
13041308

13051309
/* Since the lookup is lockless, hash insertion must be done after
1306-
* starting the timer and setting the CONFIRMED bit. The RCU barriers
1307-
* guarantee that no other CPU can find the conntrack before the above
1308-
* stores are visible.
1310+
* setting ct->timeout. The RCU barriers guarantee that no other CPU
1311+
* can find the conntrack before the above stores are visible.
13091312
*/
13101313
__nf_conntrack_hash_insert(ct, hash, reply_hash);
1314+
1315+
/* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups
1316+
* skip entries that lack this bit. This happens when a CPU is looking
1317+
* at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU
1318+
* or when another CPU encounters this entry right after the insertion
1319+
* but before the set-confirm-bit below. This bit must not be set until
1320+
* after __nf_conntrack_hash_insert().
1321+
*/
1322+
smp_mb__before_atomic();
1323+
set_bit(IPS_CONFIRMED_BIT, &ct->status);
1324+
13111325
nf_conntrack_double_unlock(hash, reply_hash);
13121326
local_bh_enable();
13131327

net/netfilter/nf_tables_trace.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
127127
if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
128128
return -1;
129129

130+
/* Kernel implementation detail, withhold this from userspace for now */
131+
status &= ~IPS_NAT_CLASH;
132+
130133
if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
131134
return -1;
132135
}

tools/testing/selftests/net/netfilter/conntrack_resize.sh

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ tmpfile=""
1212
tmpfile_proc=""
1313
tmpfile_uniq=""
1414
ret=0
15+
have_socat=0
16+
17+
socat -h > /dev/null && have_socat=1
1518

1619
insert_count=2000
1720
[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
@@ -123,7 +126,7 @@ ctflush() {
123126
done
124127
}
125128

126-
ctflood()
129+
ct_pingflood()
127130
{
128131
local ns="$1"
129132
local duration="$2"
@@ -152,6 +155,28 @@ ctflood()
152155
wait
153156
}
154157

158+
ct_udpflood()
159+
{
160+
local ns="$1"
161+
local duration="$2"
162+
local now=$(date +%s)
163+
local end=$((now + duration))
164+
165+
[ $have_socat -ne "1" ] && return
166+
167+
while [ $now -lt $end ]; do
168+
ip netns exec "$ns" bash<<"EOF"
169+
for i in $(seq 1 100);do
170+
dport=$(((RANDOM%65536)+1))
171+
172+
echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
173+
done > /dev/null 2>&1
174+
wait
175+
EOF
176+
now=$(date +%s)
177+
done
178+
}
179+
155180
# dump to /dev/null. We don't want dumps to cause infinite loops
156181
# or use-after-free even when conntrack table is altered while dumps
157182
# are in progress.
@@ -169,6 +194,48 @@ ct_nulldump()
169194
wait
170195
}
171196

197+
ct_nulldump_loop()
198+
{
199+
local ns="$1"
200+
local duration="$2"
201+
local now=$(date +%s)
202+
local end=$((now + duration))
203+
204+
while [ $now -lt $end ]; do
205+
ct_nulldump "$ns"
206+
sleep $((RANDOM%2))
207+
now=$(date +%s)
208+
done
209+
}
210+
211+
change_timeouts()
212+
{
213+
local ns="$1"
214+
local r1=$((RANDOM%2))
215+
local r2=$((RANDOM%2))
216+
217+
[ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
218+
[ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
219+
}
220+
221+
ct_change_timeouts_loop()
222+
{
223+
local ns="$1"
224+
local duration="$2"
225+
local now=$(date +%s)
226+
local end=$((now + duration))
227+
228+
while [ $now -lt $end ]; do
229+
change_timeouts "$ns"
230+
sleep $((RANDOM%2))
231+
now=$(date +%s)
232+
done
233+
234+
# restore defaults
235+
ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
236+
ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
237+
}
238+
172239
check_taint()
173240
{
174241
local tainted_then="$1"
@@ -198,10 +265,13 @@ insert_flood()
198265

199266
r=$((RANDOM%$insert_count))
200267

201-
ctflood "$n" "$timeout" "floodresize" &
268+
ct_pingflood "$n" "$timeout" "floodresize" &
269+
ct_udpflood "$n" "$timeout" &
270+
202271
insert_ctnetlink "$n" "$r" &
203272
ctflush "$n" "$timeout" &
204-
ct_nulldump "$n" &
273+
ct_nulldump_loop "$n" "$timeout" &
274+
ct_change_timeouts_loop "$n" "$timeout" &
205275

206276
wait
207277
}
@@ -306,7 +376,7 @@ test_dump_all()
306376

307377
ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
308378

309-
ctflood "$nsclient1" $timeout "dumpall" &
379+
ct_pingflood "$nsclient1" $timeout "dumpall" &
310380
insert_ctnetlink "$nsclient2" $insert_count
311381

312382
wait
@@ -368,7 +438,7 @@ test_conntrack_disable()
368438
ct_flush_once "$nsclient1"
369439
ct_flush_once "$nsclient2"
370440

371-
ctflood "$nsclient1" "$timeout" "conntrack disable"
441+
ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
372442
ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
373443

374444
# Disabled, should not have picked up any connection.

0 commit comments

Comments
 (0)