Skip to content

Commit aee8a36

Browse files
committed
Merge: netfilter: nf_nat: undo erroneous tcp edemux lookup after port clash
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4971 JIRA: https://issues.redhat.com/browse/RHEL-6151 Upstream Status: all mainline Signed-off-by: Florian Westphal <fwestpha@redhat.com> Approved-by: Xin Long <lxin@redhat.com> Approved-by: Phil Sutter <psutter@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents ebf3c5c + 9a5e153 commit aee8a36

File tree

2 files changed

+98
-12
lines changed

2 files changed

+98
-12
lines changed

net/netfilter/nf_nat_proto.c

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
697697
}
698698
#endif
699699

700+
static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport)
701+
{
702+
enum ip_conntrack_info ctinfo;
703+
enum ip_conntrack_dir dir;
704+
const struct nf_conn *ct;
705+
706+
ct = nf_ct_get(skb, &ctinfo);
707+
if (!ct)
708+
return false;
709+
710+
switch (nf_ct_protonum(ct)) {
711+
case IPPROTO_TCP:
712+
case IPPROTO_UDP:
713+
break;
714+
default:
715+
return false;
716+
}
717+
718+
dir = CTINFO2DIR(ctinfo);
719+
if (dir != IP_CT_DIR_ORIGINAL)
720+
return false;
721+
722+
return ct->tuplehash[!dir].tuple.dst.u.all != sport;
723+
}
724+
700725
static unsigned int
701726
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
702727
const struct nf_hook_state *state)
@@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
707732

708733
ret = nf_nat_ipv4_fn(priv, skb, state);
709734

710-
if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
711-
!inet_sk_transparent(sk))
735+
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
736+
return ret;
737+
738+
/* skb has a socket assigned via tcp edemux. We need to check
739+
* if nf_nat_ipv4_fn() has mangled the packet in a way that
740+
* edemux would not have found this socket.
741+
*
742+
* This includes both changes to the source address and changes
743+
* to the source port, which are both handled by the
744+
* nf_nat_ipv4_fn() call above -- long after tcp/udp early demux
745+
* might have found a socket for the old (pre-snat) address.
746+
*/
747+
if (saddr != ip_hdr(skb)->saddr ||
748+
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
712749
skb_orphan(skb); /* TCP edemux obtained wrong socket */
713750

714751
return ret;
@@ -937,6 +974,27 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
937974
return nf_nat_inet_fn(priv, skb, state);
938975
}
939976

977+
static unsigned int
978+
nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb,
979+
const struct nf_hook_state *state)
980+
{
981+
struct in6_addr saddr = ipv6_hdr(skb)->saddr;
982+
struct sock *sk = skb->sk;
983+
unsigned int ret;
984+
985+
ret = nf_nat_ipv6_fn(priv, skb, state);
986+
987+
if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk))
988+
return ret;
989+
990+
/* see nf_nat_ipv4_local_in */
991+
if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) ||
992+
nf_nat_inet_port_was_mangled(skb, sk->sk_dport))
993+
skb_orphan(skb);
994+
995+
return ret;
996+
}
997+
940998
static unsigned int
941999
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
9421000
const struct nf_hook_state *state)
@@ -1052,7 +1110,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
10521110
},
10531111
/* After packet filtering, change source */
10541112
{
1055-
.hook = nf_nat_ipv6_fn,
1113+
.hook = nf_nat_ipv6_local_in,
10561114
.pf = NFPROTO_IPV6,
10571115
.hooknum = NF_INET_LOCAL_IN,
10581116
.priority = NF_IP6_PRI_NAT_SRC,

tools/testing/selftests/netfilter/nf_nat_edemux.sh

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@ ret=0
1111
sfx=$(mktemp -u "XXXXXXXX")
1212
ns1="ns1-$sfx"
1313
ns2="ns2-$sfx"
14+
socatpid=0
1415

1516
cleanup()
1617
{
18+
[ $socatpid -gt 0 ] && kill $socatpid
1719
ip netns del $ns1
1820
ip netns del $ns2
1921
}
2022

21-
iperf3 -v > /dev/null 2>&1
23+
socat -h > /dev/null 2>&1
2224
if [ $? -ne 0 ];then
23-
echo "SKIP: Could not run test without iperf3"
25+
echo "SKIP: Could not run test without socat"
2426
exit $ksft_skip
2527
fi
2628

@@ -60,8 +62,8 @@ ip netns exec $ns2 ip link set up dev veth2
6062
ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
6163

6264
# Create a server in one namespace
63-
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
64-
iperfs=$!
65+
ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
66+
socatpid=$!
6567

6668
# Restrict source port to just one so we don't have to exhaust
6769
# all others.
@@ -83,17 +85,43 @@ sleep 1
8385
# ip daddr:dport will be rewritten to 192.168.1.1 5201
8486
# NAT must reallocate source port 10000 because
8587
# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
86-
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null
88+
echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
8789
ret=$?
8890

89-
kill $iperfs
90-
9191
# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
9292
if [ $ret -eq 0 ]; then
9393
echo "PASS: socat can connect via NAT'd address"
9494
else
9595
echo "FAIL: socat cannot connect via NAT'd address"
96-
exit 1
9796
fi
9897

99-
exit 0
98+
# check sport clashres.
99+
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
100+
ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
101+
102+
sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
103+
cpid1=$!
104+
sleep 1
105+
106+
# if connect succeeds, client closes instantly due to EOF on stdin.
107+
# if connect hangs, it will time out after 5s.
108+
echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
109+
cpid2=$!
110+
111+
time_then=$(date +%s)
112+
wait $cpid2
113+
rv=$?
114+
time_now=$(date +%s)
115+
116+
# Check how much time has elapsed, expectation is for
117+
# 'cpid2' to connect and then exit (and no connect delay).
118+
delta=$((time_now - time_then))
119+
120+
if [ $delta -lt 2 -a $rv -eq 0 ]; then
121+
echo "PASS: could connect to service via redirected ports"
122+
else
123+
echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
124+
ret=1
125+
fi
126+
127+
exit $ret

0 commit comments

Comments
 (0)