Skip to content

Commit 347abc5

Browse files
committed
netstacklat: convert bpf_printk to optional debug feature
For production we need a way to disable any use of bpf_printk. To track errors in production introduce a map for counting these errors, as that will be exposed as a Prometheus counter naming it netstacklat_errors_total. The new "dbg" macro handled/hides if bpf_printk or counters are enabled. Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
1 parent 1443f7b commit 347abc5

File tree

2 files changed

+83
-11
lines changed

2 files changed

+83
-11
lines changed

examples/netstacklat.bpf.c

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,57 @@ static void record_latency_since(ktime_t tstamp, const struct hist_key *key)
265265
}
266266
#endif /* !CONFIG_MAP_MACROS */
267267

268+
/* Debug facility to count errors */
269+
#define MAX_ERROR_TYPES 8
270+
enum error_types {
271+
ERR_UNKNOWN = 0,
272+
ERR_sk_storage = 1,
273+
ERR_READ_TCP_rcv_wup = 2,
274+
ERR_READ_TCP_rcv_wnd = 3,
275+
ERR_READ_TCP_rcv_nxt = 4,
276+
ERR_READ_TCP_last_skb_cb = 5,
277+
ERR_READ_TCP_cp_seq = 6,
278+
ERR_READ_TCP_rcv_ooopack = 7,
279+
};
280+
struct {
281+
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
282+
__uint(max_entries, MAX_ERROR_TYPES);
283+
__type(key, u32);
284+
__type(value, u64);
285+
} netstacklat_errors_total SEC(".maps");
286+
287+
/* This provide easy way to disable debug feature for errors.
288+
* Disabling this reduces BPF code size.
289+
*/
290+
#define CONFIG_TRACK_ERRORS 1
291+
/* #define CONFIG_PRINT_ERRORS 1 */
292+
#undef CONFIG_PRINT_ERRORS
293+
294+
void record_errors(u32 err)
295+
{
296+
#ifdef CONFIG_TRACK_ERRORS
297+
u32 key = ERR_UNKNOWN;
298+
299+
if (err < MAX_ERROR_TYPES)
300+
key = err;
301+
302+
increment_map_nosync(&netstacklat_errors_total, &key, 1);
303+
#endif /* CONFIG_TRACK_ERRORS */
304+
}
305+
306+
#ifdef CONFIG_PRINT_ERRORS
307+
#define my_printk(fmt, ...) bpf_printk(fmt, ##__VA_ARGS__)
308+
#else /* !CONFIG_PRINT_ERRORS */
309+
#define my_printk(fmt, ...)
310+
#endif
311+
312+
/* Debug macro that can be disabled compile time */
313+
#define dbg(__ERR_NR, fmt, ...) \
314+
({ \
315+
record_errors(__ERR_NR); \
316+
my_printk(fmt, ##__VA_ARGS__); \
317+
})
318+
268319
static inline bool filter_nth_packet(const enum netstacklat_hook hook)
269320
{
270321
u32 key = hook;
@@ -516,13 +567,15 @@ static int get_current_rcv_wnd_seq(struct tcp_sock *tp, u32 rcv_nxt, u32 *seq)
516567

517568
err = bpf_core_read(&rcv_wup, sizeof(rcv_wup), &tp->rcv_wup);
518569
if (err) {
519-
bpf_printk("failed to read tcp_sock->rcv_wup, err=%d", err);
570+
dbg(ERR_READ_TCP_rcv_wup,
571+
"failed to read tcp_sock->rcv_wup, err=%d", err);
520572
goto exit;
521573
}
522574

523575
err = bpf_core_read(&rcv_wnd, sizeof(rcv_wnd), &tp->rcv_wnd);
524576
if (err) {
525-
bpf_printk("failed to read tcp_sock->rcv_wnd, err=%d", err);
577+
dbg(ERR_READ_TCP_rcv_wnd,
578+
"failed to read tcp_sock->rcv_wnd, err=%d", err);
526579
goto exit;
527580
}
528581

@@ -543,7 +596,8 @@ static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
543596

544597
err = bpf_core_read(&rcv_nxt, sizeof(rcv_nxt), &tp->rcv_nxt);
545598
if (err) {
546-
bpf_printk("failed reading tcp_sock->rcv_nxt, err=%d", err);
599+
dbg(ERR_READ_TCP_rcv_nxt,
600+
"failed reading tcp_sock->rcv_nxt, err=%d", err);
547601
goto exit;
548602
}
549603

@@ -562,9 +616,8 @@ static int current_max_possible_ooo_seq(struct tcp_sock *tp, u32 *seq)
562616
*/
563617
err = BPF_CORE_READ_INTO(&cb, tp, ooo_last_skb, cb);
564618
if (err) {
565-
bpf_printk(
566-
"failed to read tcp_sock->ooo_last_skb->cb, err=%d",
567-
err);
619+
dbg(ERR_READ_TCP_last_skb_cb,
620+
"failed to read tcp_sock->ooo_last_skb->cb, err=%d", err);
568621
goto exit;
569622
}
570623

@@ -598,7 +651,8 @@ static bool tcp_read_in_ooo_range(struct tcp_sock *tp,
598651

599652
err = bpf_core_read(&read_seq, sizeof(read_seq), &tp->copied_seq);
600653
if (err) {
601-
bpf_printk("failed to read tcp_sock->copied_seq, err=%d", err);
654+
dbg(ERR_READ_TCP_cp_seq,
655+
"failed to read tcp_sock->copied_seq, err=%d", err);
602656
return true; // Assume we may be in ooo-range
603657
}
604658

@@ -619,8 +673,8 @@ static bool tcp_read_maybe_holblocked(struct sock *sk)
619673

620674
err = bpf_core_read(&n_ooopkts, sizeof(n_ooopkts), &tp->rcv_ooopack);
621675
if (err) {
622-
bpf_printk("failed to read tcp_sock->rcv_ooopack, err=%d\n",
623-
err);
676+
dbg(ERR_READ_TCP_rcv_ooopack,
677+
"failed to read tcp_sock->rcv_ooopack, err=%d\n", err);
624678
return true; // Assume we may be in ooo-range
625679
}
626680

@@ -630,8 +684,8 @@ static bool tcp_read_maybe_holblocked(struct sock *sk)
630684
ooo_range = bpf_sk_storage_get(&netstack_tcp_ooo_range, sk, NULL,
631685
BPF_SK_STORAGE_GET_F_CREATE);
632686
if (!ooo_range) {
633-
bpf_printk(
634-
"failed getting ooo-range socket storage for tcp socket");
687+
dbg(ERR_sk_storage,
688+
"failed getting ooo-range socket storage for tcp socket");
635689
return true; // Assume we may be in ooo-range
636690
}
637691

examples/netstacklat.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,24 @@ metrics:
3737
size: 2
3838
decoders:
3939
- name: uint
40+
counters:
41+
- name: netstacklat_errors_total
42+
help: Counter for bpf_core_read errors in code (can be disabled in code)
43+
labels:
44+
- name: type
45+
size: 4
46+
decoders:
47+
- name: uint
48+
- name: static_map
49+
static_map:
50+
0: unknown
51+
1: err_sk_storage
52+
2: err_read_tcp_rcv_wup
53+
3: err_read_tcp_rcv_wnd
54+
4: err_read_tcp_rcv_nxt
55+
5: err_read_tcp_last_skb_cb
56+
6: err_read_tcp_cp_seq
57+
7: err_read_tcp_rcv_ooopack
4058

4159
# Remember to update #define N_CGROUPS in code when adding more matches
4260
cgroup_id_map:

0 commit comments

Comments
 (0)