Skip to content

Commit 7701794

Browse files
author
CKI KWF Bot
committed
Merge: tcp: allow to control max RTO
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1480 Adding support for controlling the TCP max RTO, through a sysctl or a sockopt. Tested using a packetdrill script (see the linked issue). JIRA: https://issues.redhat.com/browse/RHEL-115393 Signed-off-by: Antoine Tenart <atenart@redhat.com> Approved-by: Paolo Abeni <pabeni@redhat.com> Approved-by: Hangbin Liu <haliu@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 9e4302f + 28576ae commit 7701794

File tree

14 files changed

+93
-49
lines changed

14 files changed

+93
-49
lines changed

Documentation/networking/ip-sysctl.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,8 @@ tcp_retries2 - INTEGER
705705
seconds and is a lower bound for the effective timeout.
706706
TCP will effectively time out at the first RTO which exceeds the
707707
hypothetical timeout.
708+
If tcp_rto_max_ms is decreased, it is recommended to also
709+
change tcp_retries2.
708710

709711
RFC 1122 recommends at least 100 seconds for the timeout,
710712
which corresponds to a value of at least 8.
@@ -1223,6 +1225,17 @@ tcp_rto_min_us - INTEGER
12231225

12241226
Default: 200000
12251227

1228+
tcp_rto_max_ms - INTEGER
1229+
Maximal TCP retransmission timeout (in ms).
1230+
Note that TCP_RTO_MAX_MS socket option has higher precedence.
1231+
1232+
When changing tcp_rto_max_ms, it is important to understand
1233+
that tcp_retries2 might need a change.
1234+
1235+
Possible Values: 1000 - 120,000
1236+
1237+
Default: 120,000
1238+
12261239
UDP variables
12271240
=============
12281241

Documentation/networking/net_cachelines/inet_connection_sock.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ struct_timer_list icsk_retransmit_timer read_mostly -
1616
struct_timer_list icsk_delack_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect
1717
u32 icsk_rto read_write - tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
1818
u32 icsk_rto_min - -
19+
u32 icsk_rto_max read_mostly - tcp_reset_xmit_timer
1920
u32 icsk_delack_max - -
2021
u32 icsk_pmtu_cookie read_write - tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect
2122
struct_tcp_congestion_ops icsk_ca_ops read_write - tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit

Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ u8 sysctl_tcp_sack -
8484
u8 sysctl_tcp_window_scaling - - tcp_syn_options,tcp_parse_options
8585
u8 sysctl_tcp_timestamps
8686
u8 sysctl_tcp_early_retrans read_mostly - tcp_schedule_loss_probe(tcp_write_xmit)
87+
u32 sysctl_tcp_rto_max_ms - -
8788
u8 sysctl_tcp_recovery - - tcp_fastretrans_alert
8889
u8 sysctl_tcp_thin_linear_timeouts - - tcp_retrans_timer(on_thin_streams)
8990
u8 sysctl_tcp_slow_start_after_idle - - unlikely(tcp_cwnd_validate-network-not-starved)

include/net/inet_connection_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ struct inet_connection_sock {
9090
struct timer_list icsk_delack_timer;
9191
__u32 icsk_rto;
9292
__u32 icsk_rto_min;
93+
u32 icsk_rto_max;
9394
__u32 icsk_delack_max;
9495
__u32 icsk_pmtu_cookie;
9596
const struct tcp_congestion_ops *icsk_ca_ops;

include/net/netns/ipv4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ struct netns_ipv4 {
181181
u8 sysctl_tcp_window_scaling;
182182
u8 sysctl_tcp_timestamps;
183183
int sysctl_tcp_rto_min_us;
184+
int sysctl_tcp_rto_max_ms;
184185
u8 sysctl_tcp_recovery;
185186
u8 sysctl_tcp_thin_linear_timeouts;
186187
u8 sysctl_tcp_slow_start_after_idle;

include/net/tcp.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
144144
#define TCP_DELACK_MIN 4U
145145
#define TCP_ATO_MIN 4U
146146
#endif
147-
#define TCP_RTO_MAX ((unsigned)(120*HZ))
148-
#define TCP_RTO_MIN ((unsigned)(HZ/5))
147+
#define TCP_RTO_MAX_SEC 120
148+
#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ))
149+
#define TCP_RTO_MIN ((unsigned)(HZ / 5))
149150
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
150151

151152
#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
@@ -754,10 +755,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
754755
int tcp_mss_to_mtu(struct sock *sk, int mss);
755756
void tcp_mtup_init(struct sock *sk);
756757

758+
static inline unsigned int tcp_rto_max(const struct sock *sk)
759+
{
760+
return READ_ONCE(inet_csk(sk)->icsk_rto_max);
761+
}
762+
757763
static inline void tcp_bound_rto(struct sock *sk)
758764
{
759-
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
760-
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
765+
inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk));
761766
}
762767

763768
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
@@ -1440,10 +1445,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk)
14401445
static inline void tcp_reset_xmit_timer(struct sock *sk,
14411446
const int what,
14421447
unsigned long when,
1443-
const unsigned long max_when)
1448+
bool pace_delay)
14441449
{
1445-
inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
1446-
max_when);
1450+
if (pace_delay)
1451+
when += tcp_pacing_delay(sk);
1452+
inet_csk_reset_xmit_timer(sk, what, when,
1453+
tcp_rto_max(sk));
14471454
}
14481455

14491456
/* Something is really bad, we could not queue an additional packet,
@@ -1472,7 +1479,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
14721479
{
14731480
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
14741481
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
1475-
tcp_probe0_base(sk), TCP_RTO_MAX);
1482+
tcp_probe0_base(sk), true);
14761483
}
14771484

14781485
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)

include/uapi/linux/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ enum {
136136
#define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */
137137

138138
#define TCP_IS_MPTCP 43 /* Is MPTCP being used? */
139+
#define TCP_RTO_MAX_MS 44 /* max rto time in ms */
139140

140141
#define TCP_REPAIR_ON 1
141142
#define TCP_REPAIR_OFF 0

net/ipv4/sysctl_net_ipv4.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31;
2828
static int tcp_app_win_max = 31;
2929
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
3030
static int tcp_min_snd_mss_max = 65535;
31+
static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
3132
static int ip_privileged_port_min;
3233
static int ip_privileged_port_max = 65535;
3334
static int ip_ttl_min = 1;
@@ -1573,6 +1574,15 @@ static struct ctl_table ipv4_net_table[] = {
15731574
.proc_handler = proc_dointvec_minmax,
15741575
.extra1 = SYSCTL_ONE,
15751576
},
1577+
{
1578+
.procname = "tcp_rto_max_ms",
1579+
.data = &init_net.ipv4.sysctl_tcp_rto_max_ms,
1580+
.maxlen = sizeof(int),
1581+
.mode = 0644,
1582+
.proc_handler = proc_dointvec_minmax,
1583+
.extra1 = SYSCTL_ONE_THOUSAND,
1584+
.extra2 = &tcp_rto_max_max,
1585+
},
15761586
};
15771587

15781588
static __net_init int ipv4_sysctl_init_net(struct net *net)

net/ipv4/tcp.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk)
423423
{
424424
struct inet_connection_sock *icsk = inet_csk(sk);
425425
struct tcp_sock *tp = tcp_sk(sk);
426-
int rto_min_us;
426+
int rto_min_us, rto_max_ms;
427427

428428
tp->out_of_order_queue = RB_ROOT;
429429
sk->tcp_rtx_queue = RB_ROOT;
@@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk)
432432
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
433433

434434
icsk->icsk_rto = TCP_TIMEOUT_INIT;
435+
436+
rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms);
437+
icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms);
438+
435439
rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
436440
icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
437441
icsk->icsk_delack_max = TCP_DELACK_MAX;
@@ -3800,6 +3804,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
38003804
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
38013805
TCP_RTO_MAX / HZ));
38023806
return 0;
3807+
case TCP_RTO_MAX_MS:
3808+
if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC)
3809+
return -EINVAL;
3810+
WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val));
3811+
return 0;
38033812
}
38043813

38053814
sockopt_lock_sock(sk);
@@ -4636,6 +4645,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
46364645
case TCP_IS_MPTCP:
46374646
val = 0;
46384647
break;
4648+
case TCP_RTO_MAX_MS:
4649+
val = jiffies_to_msecs(tcp_rto_max(sk));
4650+
break;
46394651
default:
46404652
return -ENOPROTOOPT;
46414653
}

net/ipv4/tcp_fastopen.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
274274
* because it's been added to the accept queue directly.
275275
*/
276276
req->timeout = tcp_timeout_init(child);
277-
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
278-
req->timeout, TCP_RTO_MAX);
277+
tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS,
278+
req->timeout, false);
279279

280280
refcount_set(&req->rsk_refcnt, 2);
281281

0 commit comments

Comments
 (0)