|
| 1 | +tcp: avoid too many retransmit packets |
| 2 | + |
| 3 | +jira LE-3201 |
| 4 | +cve CVE-2024-41007 |
| 5 | +Rebuild_History Non-Buildable kernel-rt-4.18.0-553.22.1.rt7.363.el8_10 |
| 6 | +commit-author Eric Dumazet <edumazet@google.com> |
| 7 | +commit 97a9063518f198ec0adb2ecb89789de342bb8283 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-rt-4.18.0-553.22.1.rt7.363.el8_10/97a90635.failed |
| 11 | + |
| 12 | +If a TCP socket is using TCP_USER_TIMEOUT, and the other peer |
| 13 | +retracted its window to zero, tcp_retransmit_timer() can |
| 14 | +retransmit a packet every two jiffies (2 ms for HZ=1000), |
| 15 | +for about 4 minutes after TCP_USER_TIMEOUT has 'expired'. |
| 16 | + |
| 17 | +The fix is to make sure tcp_rtx_probe0_timed_out() takes |
| 18 | +icsk->icsk_user_timeout into account. |
| 19 | + |
| 20 | +Before blamed commit, the socket would not timeout after |
| 21 | +icsk->icsk_user_timeout, but would use standard exponential |
| 22 | +backoff for the retransmits. |
| 23 | + |
| 24 | +Also worth noting that before commit e89688e3e978 ("net: tcp: |
| 25 | +fix unexcepted socket die when snd_wnd is 0"), the issue |
| 26 | +would last 2 minutes instead of 4. |
| 27 | + |
| 28 | +Fixes: b701a99e431d ("tcp: Add tcp_clamp_rto_to_user_timeout() helper to improve accuracy") |
| 29 | + Signed-off-by: Eric Dumazet <edumazet@google.com> |
| 30 | + Cc: Neal Cardwell <ncardwell@google.com> |
| 31 | + Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> |
| 32 | + Reviewed-by: Jon Maxwell <jmaxwell37@gmail.com> |
| 33 | + Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> |
| 34 | +Link: https://patch.msgid.link/20240710001402.2758273-1-edumazet@google.com |
| 35 | + Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| 36 | +(cherry picked from commit 97a9063518f198ec0adb2ecb89789de342bb8283) |
| 37 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 38 | + |
| 39 | +# Conflicts: |
| 40 | +# net/ipv4/tcp_timer.c |
| 41 | +diff --cc net/ipv4/tcp_timer.c |
| 42 | +index c8ceb32ca8de,892c86657fbc..000000000000 |
| 43 | +--- a/net/ipv4/tcp_timer.c |
| 44 | ++++ b/net/ipv4/tcp_timer.c |
| 45 | +@@@ -420,13 -480,29 +420,35 @@@ static void tcp_fastopen_synack_timer(s |
| 46 | + } |
| 47 | + |
| 48 | + static bool tcp_rtx_probe0_timed_out(const struct sock *sk, |
| 49 | + - const struct sk_buff *skb, |
| 50 | + - u32 rtx_delta) |
| 51 | + + const struct sk_buff *skb) |
| 52 | + { |
| 53 | ++ const struct inet_connection_sock *icsk = inet_csk(sk); |
| 54 | ++ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); |
| 55 | + const struct tcp_sock *tp = tcp_sk(sk); |
| 56 | +++<<<<<<< HEAD |
| 57 | + + const int timeout = TCP_RTO_MAX * 2; |
| 58 | + + u32 rcv_delta, rtx_delta; |
| 59 | + + |
| 60 | + + rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; |
| 61 | +++======= |
| 62 | ++ int timeout = TCP_RTO_MAX * 2; |
| 63 | ++ s32 rcv_delta; |
| 64 | ++ |
| 65 | ++ if (user_timeout) { |
| 66 | ++ /* If user application specified a TCP_USER_TIMEOUT, |
| 67 | ++ * it does not want win 0 packets to 'reset the timer' |
| 68 | ++ * while retransmits are not making progress. |
| 69 | ++ */ |
| 70 | ++ if (rtx_delta > user_timeout) |
| 71 | ++ return true; |
| 72 | ++ timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); |
| 73 | ++ } |
| 74 | ++ /* Note: timer interrupt might have been delayed by at least one jiffy, |
| 75 | ++ * and tp->rcv_tstamp might very well have been written recently. |
| 76 | ++ * rcv_delta can thus be negative. |
| 77 | ++ */ |
| 78 | ++ rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; |
| 79 | +++>>>>>>> 97a9063518f1 (tcp: avoid too many retransmit packets) |
| 80 | + if (rcv_delta <= timeout) |
| 81 | + return false; |
| 82 | + |
| 83 | +* Unmerged path net/ipv4/tcp_timer.c |
0 commit comments