@@ -46,6 +46,8 @@ struct sk_buff___old {
4646struct tcp_sock_ooo_range {
4747 u32 prev_n_ooopkts ;
4848 u32 ooo_seq_end ;
49+ u32 last_rcv_nxt ;
50+ u32 last_copied_seq ;
4951 /* indicates if ooo_seq_end is still valid (as 0 can be valid seq) */
5052 bool active ;
5153};
@@ -330,10 +332,43 @@ static bool filter_min_sockqueue_len(struct sock *sk)
330332 return false;
331333}
332334
333- static int current_max_possible_ooo_seq (struct tcp_sock * tp , u32 * seq )
335+ /* Get the current receive window end sequence for tp
336+ * In the kernel receive window checks are done against
337+ * tp->rcv_nxt + tcp_receive_window(tp). This function should give a compareable
338+ * result, i.e. rcv_wup + rcv_wnd or rcv_nxt, whichever is higher
339+ */
340+ static int get_current_rcv_wnd_seq (struct tcp_sock * tp , u32 rcv_nxt , u32 * seq )
341+ {
342+ u32 rcv_wup , rcv_wnd , window = 0 ;
343+ int err ;
344+
345+ err = bpf_core_read (& rcv_wup , sizeof (rcv_wup ), & tp -> rcv_wup );
346+ if (err ) {
347+ bpf_printk ("failed to read tcp_sock->rcv_wup, err=%d" , err );
348+ goto exit ;
349+ }
350+
351+ err = bpf_core_read (& rcv_wnd , sizeof (rcv_wnd ), & tp -> rcv_wnd );
352+ if (err ) {
353+ bpf_printk ("failed to read tcp_sock->rcv_wnd, err=%d" , err );
354+ goto exit ;
355+ }
356+
357+ window = rcv_wup + rcv_nxt ;
358+ if (u32_lt (window , rcv_nxt ))
359+ window = rcv_nxt ;
360+
361+ exit :
362+ * seq = window ;
363+ return err ;
364+ }
365+
366+ static int current_max_possible_ooo_seq (struct tcp_sock * tp , u32 rcv_nxt ,
367+ struct tcp_sock_ooo_range * ooo_range ,
368+ u32 * seq )
334369{
370+ u32 cur_rcv_window , max_seq = 0 ;
335371 struct tcp_skb_cb cb ;
336- u32 max_seq = 0 ;
337372 int err = 0 ;
338373
339374 if (BPF_CORE_READ (tp , out_of_order_queue .rb_node ) == NULL ) {
@@ -342,44 +377,45 @@ static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
342377 * receive queue. Current rcv_nxt must therefore be ahead
343378 * of all ooo-segments that have arrived until now.
344379 */
345- err = bpf_core_read (& max_seq , sizeof (max_seq ), & tp -> rcv_nxt );
346- if (err )
347- bpf_printk ("failed to read tcp_sock->rcv_nxt, err=%d" ,
348- err );
349- } else {
350- /*
351- * Some ooo-segments currently in ooo-queue
352- * Max out-of-order seq is given by the seq_end of the tail
353- * skb in the ooo-queue.
354- */
355- err = BPF_CORE_READ_INTO (& cb , tp , ooo_last_skb , cb );
356- if (err )
357- bpf_printk (
358- "failed to read tcp_sock->ooo_last_skb->cb, err=%d" ,
359- err );
360- max_seq = cb .end_seq ;
380+ max_seq = rcv_nxt ;
381+ goto exit ;
382+ }
383+
384+ /*
385+ * Some ooo-segments currently in ooo-queue
386+ * Max out-of-order seq is given by the seq_end of the tail
387+ * skb in the ooo-queue.
388+ */
389+ err = BPF_CORE_READ_INTO (& cb , tp , ooo_last_skb , cb );
390+ if (err ) {
391+ bpf_printk ("failed to read tcp_sock->ooo_last_skb->cb, err=%d" ,
392+ err );
393+ goto exit ;
361394 }
362395
396+ // Sanity check - ooo_last_skb->cb.end_seq within the receive window?
397+ err = get_current_rcv_wnd_seq (tp , rcv_nxt , & cur_rcv_window );
398+ if (err )
399+ goto exit ;
400+
401+ if (cb .end_seq == 0 || u32_lt (cur_rcv_window , cb .end_seq ))
402+ max_seq = cur_rcv_window ;
403+ else
404+ max_seq = cb .end_seq ;
405+ max_seq = cb .end_seq ;
406+
407+ exit :
363408 * seq = max_seq ;
364409 return err ;
365410}
366411
367- static bool tcp_read_in_ooo_range (struct tcp_sock * tp ,
412+ static bool tcp_read_in_ooo_range (struct tcp_sock * tp , u32 copied_seq ,
368413 struct tcp_sock_ooo_range * ooo_range )
369414{
370- u32 read_seq ;
371- int err ;
372-
373415 if (!ooo_range -> active )
374416 return false;
375417
376- err = bpf_core_read (& read_seq , sizeof (read_seq ), & tp -> copied_seq );
377- if (err ) {
378- bpf_printk ("failed to read tcp_sock->copied_seq, err=%d" , err );
379- return true; // Assume we may be in ooo-range
380- }
381-
382- if (u32_lt (ooo_range -> ooo_seq_end , read_seq )) {
418+ if (u32_lt (ooo_range -> ooo_seq_end , copied_seq )) {
383419 ooo_range -> active = false;
384420 return false;
385421 } else {
@@ -389,9 +425,10 @@ static bool tcp_read_in_ooo_range(struct tcp_sock *tp,
389425
390426static bool tcp_read_maybe_holblocked (struct sock * sk )
391427{
428+ u32 n_ooopkts , rcv_nxt , copied_seq , nxt_seq ;
392429 struct tcp_sock_ooo_range * ooo_range ;
393430 struct tcp_sock * tp = tcp_sk (sk );
394- u32 n_ooopkts , nxt_seq ;
431+ bool invalid_values = false ;
395432 int err ;
396433
397434 err = bpf_core_read (& n_ooopkts , sizeof (n_ooopkts ), & tp -> rcv_ooopack );
@@ -412,18 +449,46 @@ static bool tcp_read_maybe_holblocked(struct sock *sk)
412449 return true; // Assume we may be in ooo-range
413450 }
414451
452+ /* rcv_nxt and copied_seq may not be needed, but to ensure we always
453+ * update our tracked state for them, read, sanity check and update
454+ * their values here.
455+ */
456+ err = bpf_core_read (& rcv_nxt , sizeof (rcv_nxt ), & tp -> rcv_nxt );
457+ if (err || (ooo_range -> last_rcv_nxt &&
458+ u32_lt (rcv_nxt , ooo_range -> last_rcv_nxt ))) {
459+ bpf_printk ("failed to read valid tcp_sock->rcv_nxt, err=%d" ,
460+ err );
461+ invalid_values = true;
462+ } else {
463+ ooo_range -> last_rcv_nxt = rcv_nxt ;
464+ }
465+
466+ err = bpf_core_read (& copied_seq , sizeof (copied_seq ), & tp -> copied_seq );
467+ if (err || (ooo_range -> last_copied_seq &&
468+ u32_lt (copied_seq , ooo_range -> last_copied_seq ))) {
469+ bpf_printk ("failed to read valid tcp_sock->copied_seq, err=%d" ,
470+ err );
471+ invalid_values = true;
472+ } else {
473+ ooo_range -> last_copied_seq = copied_seq ;
474+ }
475+
476+ if (invalid_values )
477+ return true; // Assume we may be in ooo-range
478+
415479 // Increase in ooo-packets since last - figure out next safe seq
416480 if (n_ooopkts > ooo_range -> prev_n_ooopkts ) {
417481 ooo_range -> prev_n_ooopkts = n_ooopkts ;
418- err = current_max_possible_ooo_seq (tp , & nxt_seq );
482+ err = current_max_possible_ooo_seq (tp , rcv_nxt , ooo_range ,
483+ & nxt_seq );
419484 if (!err ) {
420485 ooo_range -> ooo_seq_end = nxt_seq ;
421486 ooo_range -> active = true;
422487 }
423488 return true;
424489 }
425490
426- return tcp_read_in_ooo_range (tp , ooo_range );
491+ return tcp_read_in_ooo_range (tp , copied_seq , ooo_range );
427492}
428493
429494static void record_socket_latency (struct sock * sk , struct sk_buff * skb ,
0 commit comments