Skip to content

Commit fca192b

Browse files
committed
WIP: netstacklat: Add sanity checks for TCP HoL blocking filter
Something seems to be off with the ooo_last_skb->cb.end_seq and receive window calculation currently, as limiting ooo_last_skb->cb.end_seq to the current receive window seems to break the filter (it includes what appears to be HoL blocked TCP reads). Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
1 parent dff6bc5 commit fca192b

File tree

1 file changed

+97
-32
lines changed

1 file changed

+97
-32
lines changed

netstacklat/netstacklat.bpf.c

Lines changed: 97 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct sk_buff___old {
4646
struct tcp_sock_ooo_range {
4747
u32 prev_n_ooopkts;
4848
u32 ooo_seq_end;
49+
u32 last_rcv_nxt;
50+
u32 last_copied_seq;
4951
/* indicates if ooo_seq_end is still valid (as 0 can be valid seq) */
5052
bool active;
5153
};
@@ -330,10 +332,43 @@ static bool filter_min_sockqueue_len(struct sock *sk)
330332
return false;
331333
}
332334

333-
static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
335+
/* Get the current receive window end sequence for tp
336+
* In the kernel receive window checks are done against
337+
* tp->rcv_nxt + tcp_receive_window(tp). This function should give a compareable
338+
* result, i.e. rcv_wup + rcv_wnd or rcv_nxt, whichever is higher
339+
*/
340+
static int get_current_rcv_wnd_seq(struct tcp_sock *tp, u32 rcv_nxt, u32 *seq)
341+
{
342+
u32 rcv_wup, rcv_wnd, window = 0;
343+
int err;
344+
345+
err = bpf_core_read(&rcv_wup, sizeof(rcv_wup), &tp->rcv_wup);
346+
if (err) {
347+
bpf_printk("failed to read tcp_sock->rcv_wup, err=%d", err);
348+
goto exit;
349+
}
350+
351+
err = bpf_core_read(&rcv_wnd, sizeof(rcv_wnd), &tp->rcv_wnd);
352+
if (err) {
353+
bpf_printk("failed to read tcp_sock->rcv_wnd, err=%d", err);
354+
goto exit;
355+
}
356+
357+
window = rcv_wup + rcv_nxt;
358+
if (u32_lt(window, rcv_nxt))
359+
window = rcv_nxt;
360+
361+
exit:
362+
*seq = window;
363+
return err;
364+
}
365+
366+
static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 rcv_nxt,
367+
struct tcp_sock_ooo_range *ooo_range,
368+
u32 *seq)
334369
{
370+
u32 cur_rcv_window, max_seq = 0;
335371
struct tcp_skb_cb cb;
336-
u32 max_seq = 0;
337372
int err = 0;
338373

339374
if (BPF_CORE_READ(tp, out_of_order_queue.rb_node) == NULL) {
@@ -342,44 +377,45 @@ static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
342377
* receive queue. Current rcv_nxt must therefore be ahead
343378
* of all ooo-segments that have arrived until now.
344379
*/
345-
err = bpf_core_read(&max_seq, sizeof(max_seq), &tp->rcv_nxt);
346-
if (err)
347-
bpf_printk("failed to read tcp_sock->rcv_nxt, err=%d",
348-
err);
349-
} else {
350-
/*
351-
* Some ooo-segments currently in ooo-queue
352-
* Max out-of-order seq is given by the seq_end of the tail
353-
* skb in the ooo-queue.
354-
*/
355-
err = BPF_CORE_READ_INTO(&cb, tp, ooo_last_skb, cb);
356-
if (err)
357-
bpf_printk(
358-
"failed to read tcp_sock->ooo_last_skb->cb, err=%d",
359-
err);
360-
max_seq = cb.end_seq;
380+
max_seq = rcv_nxt;
381+
goto exit;
382+
}
383+
384+
/*
385+
* Some ooo-segments currently in ooo-queue
386+
* Max out-of-order seq is given by the seq_end of the tail
387+
* skb in the ooo-queue.
388+
*/
389+
err = BPF_CORE_READ_INTO(&cb, tp, ooo_last_skb, cb);
390+
if (err) {
391+
bpf_printk("failed to read tcp_sock->ooo_last_skb->cb, err=%d",
392+
err);
393+
goto exit;
361394
}
362395

396+
// Sanity check - ooo_last_skb->cb.end_seq within the receive window?
397+
err = get_current_rcv_wnd_seq(tp, rcv_nxt, &cur_rcv_window);
398+
if (err)
399+
goto exit;
400+
401+
if (cb.end_seq == 0 || u32_lt(cur_rcv_window, cb.end_seq))
402+
max_seq = cur_rcv_window;
403+
else
404+
max_seq = cb.end_seq;
405+
max_seq = cb.end_seq;
406+
407+
exit:
363408
*seq = max_seq;
364409
return err;
365410
}
366411

367-
static bool tcp_read_in_ooo_range(struct tcp_sock *tp,
412+
static bool tcp_read_in_ooo_range(struct tcp_sock *tp, u32 copied_seq,
368413
struct tcp_sock_ooo_range *ooo_range)
369414
{
370-
u32 read_seq;
371-
int err;
372-
373415
if (!ooo_range->active)
374416
return false;
375417

376-
err = bpf_core_read(&read_seq, sizeof(read_seq), &tp->copied_seq);
377-
if (err) {
378-
bpf_printk("failed to read tcp_sock->copied_seq, err=%d", err);
379-
return true; // Assume we may be in ooo-range
380-
}
381-
382-
if (u32_lt(ooo_range->ooo_seq_end, read_seq)) {
418+
if (u32_lt(ooo_range->ooo_seq_end, copied_seq)) {
383419
ooo_range->active = false;
384420
return false;
385421
} else {
@@ -389,9 +425,10 @@ static bool tcp_read_in_ooo_range(struct tcp_sock *tp,
389425

390426
static bool tcp_read_maybe_holblocked(struct sock *sk)
391427
{
428+
u32 n_ooopkts, rcv_nxt, copied_seq, nxt_seq;
392429
struct tcp_sock_ooo_range *ooo_range;
393430
struct tcp_sock *tp = tcp_sk(sk);
394-
u32 n_ooopkts, nxt_seq;
431+
bool invalid_values = false;
395432
int err;
396433

397434
err = bpf_core_read(&n_ooopkts, sizeof(n_ooopkts), &tp->rcv_ooopack);
@@ -412,18 +449,46 @@ static bool tcp_read_maybe_holblocked(struct sock *sk)
412449
return true; // Assume we may be in ooo-range
413450
}
414451

452+
/* rcv_nxt and copied_seq may not be needed, but to ensure we always
453+
* update our tracked state for them, read, sanity check and update
454+
* their values here.
455+
*/
456+
err = bpf_core_read(&rcv_nxt, sizeof(rcv_nxt), &tp->rcv_nxt);
457+
if (err || (ooo_range->last_rcv_nxt &&
458+
u32_lt(rcv_nxt, ooo_range->last_rcv_nxt))) {
459+
bpf_printk("failed to read valid tcp_sock->rcv_nxt, err=%d",
460+
err);
461+
invalid_values = true;
462+
} else {
463+
ooo_range->last_rcv_nxt = rcv_nxt;
464+
}
465+
466+
err = bpf_core_read(&copied_seq, sizeof(copied_seq), &tp->copied_seq);
467+
if (err || (ooo_range->last_copied_seq &&
468+
u32_lt(copied_seq, ooo_range->last_copied_seq))) {
469+
bpf_printk("failed to read valid tcp_sock->copied_seq, err=%d",
470+
err);
471+
invalid_values = true;
472+
} else {
473+
ooo_range->last_copied_seq = copied_seq;
474+
}
475+
476+
if (invalid_values)
477+
return true; // Assume we may be in ooo-range
478+
415479
// Increase in ooo-packets since last - figure out next safe seq
416480
if (n_ooopkts > ooo_range->prev_n_ooopkts) {
417481
ooo_range->prev_n_ooopkts = n_ooopkts;
418-
err = current_max_possible_ooo_seq(tp, &nxt_seq);
482+
err = current_max_possible_ooo_seq(tp, rcv_nxt, ooo_range,
483+
&nxt_seq);
419484
if (!err) {
420485
ooo_range->ooo_seq_end = nxt_seq;
421486
ooo_range->active = true;
422487
}
423488
return true;
424489
}
425490

426-
return tcp_read_in_ooo_range(tp, ooo_range);
491+
return tcp_read_in_ooo_range(tp, copied_seq, ooo_range);
427492
}
428493

429494
static void record_socket_latency(struct sock *sk, struct sk_buff *skb,

0 commit comments

Comments
 (0)