Skip to content

Commit 4b403b2

Browse files
authored
Merge pull request kmesh-net#887 from skwwwwww/performance-monitoring
Add Performance Monitoring
2 parents 52d88d7 + efb1588 commit 4b403b2

File tree

18 files changed

+1669
-13
lines changed

18 files changed

+1669
-13
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2+
/* Copyright Authors of Kmesh */
3+
4+
#ifndef __KMESH_BPF_PERFORMANCE_MONITOR_H__
5+
#define __KMESH_BPF_PERFORMANCE_MONITOR_H__
6+
7+
#define PERF_MONITOR 0
8+
9+
#include "bpf_common.h"
10+
11+
enum {
12+
SOCK_TRAFFIC_CONTROL = 1,
13+
XDP_SHUTDOWN = 2,
14+
ENABLE_ENCODING_METADATA = 3,
15+
};
16+
17+
struct operation_usage_data {
18+
__u64 start_time;
19+
__u64 end_time;
20+
__u64 pid_tgid;
21+
__u32 operation_type;
22+
};
23+
24+
struct operation_usage_key {
25+
__u64 socket_cookie;
26+
__u32 operation_type;
27+
};
28+
29+
struct {
30+
__uint(type, BPF_MAP_TYPE_HASH);
31+
__type(key, struct operation_usage_key);
32+
__type(value, struct operation_usage_data);
33+
__uint(map_flags, BPF_F_NO_PREALLOC);
34+
__uint(max_entries, 131072);
35+
} kmesh_perf_map SEC(".maps");
36+
37+
struct {
38+
__uint(type, BPF_MAP_TYPE_RINGBUF);
39+
__uint(max_entries, RINGBUF_SIZE);
40+
} kmesh_perf_info SEC(".maps");
41+
42+
static inline void performance_report(struct operation_usage_data *data)
43+
{
44+
struct operation_usage_data *info = NULL;
45+
info = bpf_ringbuf_reserve(&kmesh_perf_info, sizeof(struct operation_usage_data), 0);
46+
if (!info) {
47+
BPF_LOG(ERR, PROBE, "bpf_ringbuf_reserve map proformance info failed\n");
48+
return;
49+
}
50+
info->start_time = data->start_time;
51+
info->end_time = data->end_time;
52+
info->operation_type = data->operation_type;
53+
info->pid_tgid = data->pid_tgid;
54+
bpf_ringbuf_submit(info, 0);
55+
}
56+
57+
static inline void observe_on_operation_start(__u32 operation_type, struct kmesh_context *kmesh_ctx)
58+
{
59+
#if PERF_MONITOR
60+
struct operation_usage_data data = {};
61+
struct operation_usage_key key = {};
62+
struct bpf_sock_addr *ctx = kmesh_ctx->ctx;
63+
__u64 socket_cookie = bpf_get_socket_cookie(ctx);
64+
key.operation_type = operation_type;
65+
key.socket_cookie = socket_cookie;
66+
data.start_time = bpf_ktime_get_ns();
67+
data.operation_type = operation_type;
68+
bpf_map_update_elem(&kmesh_perf_map, &key, &data, BPF_ANY);
69+
return;
70+
#else
71+
return;
72+
#endif
73+
}
74+
75+
static inline void observe_on_operation_end(__u32 operation_type, struct kmesh_context *kmesh_ctx)
76+
{
77+
#if PERF_MONITOR
78+
struct operation_usage_key key = {};
79+
__u64 pid_tgid = bpf_get_current_pid_tgid();
80+
struct bpf_sock_addr *ctx = kmesh_ctx->ctx;
81+
__u64 socket_cookie = bpf_get_socket_cookie(ctx);
82+
key.operation_type = operation_type;
83+
key.socket_cookie = socket_cookie;
84+
struct operation_usage_data *data = NULL;
85+
data = bpf_map_lookup_elem(&kmesh_perf_map, &key);
86+
if (data) {
87+
data->end_time = bpf_ktime_get_ns();
88+
data->pid_tgid = pid_tgid;
89+
performance_report(data);
90+
}
91+
bpf_map_delete_elem(&kmesh_perf_map, &key);
92+
return;
93+
#else
94+
return;
95+
#endif
96+
}
97+
#endif

bpf/kmesh/probes/probe.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#define __KMESH_BPF_PROBE_H__
66

77
#include "tcp_probe.h"
8+
#include "performance_probe.h"
89

910
static inline void observe_on_pre_connect(struct bpf_sock *sk)
1011
{

bpf/kmesh/workload/cgroup_sock.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
static inline int sock_traffic_control(struct kmesh_context *kmesh_ctx)
1515
{
16+
observe_on_operation_start(SOCK_TRAFFIC_CONTROL, kmesh_ctx);
1617
int ret;
1718
frontend_value *frontend_v = NULL;
1819
frontend_key frontend_k = {0};
@@ -61,7 +62,7 @@ static inline int sock_traffic_control(struct kmesh_context *kmesh_ctx)
6162
BPF_LOG(ERR, KMESH, "frontend_manager failed, ret:%d\n", ret);
6263
return ret;
6364
}
64-
65+
observe_on_operation_end(SOCK_TRAFFIC_CONTROL, kmesh_ctx);
6566
return 0;
6667
}
6768

daemon/options/options.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ type BootstrapConfigs struct {
2929
CniConfig *cniConfig
3030
ByPassConfig *byPassConfig
3131
SecretManagerConfig *secretConfig
32+
PerfConfig *perfConfig
3233
}
3334

3435
func NewBootstrapConfigs() *BootstrapConfigs {
@@ -37,6 +38,7 @@ func NewBootstrapConfigs() *BootstrapConfigs {
3738
CniConfig: &cniConfig{},
3839
ByPassConfig: &byPassConfig{},
3940
SecretManagerConfig: &secretConfig{},
41+
PerfConfig: &perfConfig{},
4042
}
4143
}
4244

@@ -54,6 +56,7 @@ func (c *BootstrapConfigs) AttachFlags(cmd *cobra.Command) {
5456
c.CniConfig.AttachFlags(cmd)
5557
c.ByPassConfig.AttachFlags(cmd)
5658
c.SecretManagerConfig.AttachFlags(cmd)
59+
c.PerfConfig.AttachFlags(cmd)
5760
}
5861

5962
func (c *BootstrapConfigs) ParseConfigs() error {

daemon/options/perf.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright The Kmesh Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at:
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package options
18+
19+
import (
20+
"github.com/spf13/cobra"
21+
)
22+
23+
type perfConfig struct {
24+
EnablePerfMonitor bool
25+
}
26+
27+
func (c *perfConfig) AttachFlags(cmd *cobra.Command) {
28+
cmd.PersistentFlags().BoolVar(&c.EnablePerfMonitor, "enable-perfmonitor", false, "whether to start performance monitor or not, default to false")
29+
}

deploy/yaml/kmesh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ spec:
7474
command: ["/bin/sh", "-c"]
7575
args:
7676
[
77-
"./start_kmesh.sh --mode=dual-engine --enable-bypass=false --enable-bpf-log=true",
77+
"./start_kmesh.sh --mode=dual-engine --enable-bypass=false --enable-bpf-log=true --enable-perfmonitor=false",
7878
]
7979
securityContext:
8080
privileged: true
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
---
2+
title: proposal of Performance Monitoring
3+
authors:
4+
- "@skwwwwww" # Authors' GitHub accounts here.
5+
reviewers:
6+
- ""
7+
- TBD
8+
approvers:
9+
- ""
10+
- TBD
11+
12+
creation-date: 2024-09-21
13+
14+
---
15+
16+
## proposal of Kmesh observability
17+
18+
<!--
19+
This is the title of your KEP. Keep it short, simple, and descriptive. A good
20+
title can help communicate what the KEP is and should be considered as part of
21+
any review.
22+
-->
23+
### Summary
24+
25+
<!--
26+
This section is incredibly important for producing high-quality, user-focused
27+
documentation such as release notes or a development roadmap.
28+
29+
A good summary is probably at least a paragraph in length.
30+
-->
31+
32+
The significance of performance monitoring in Kmesh as the foundation for efficient, scalable, and robust mesh systems is paramount. In Kmesh, monitoring key metrics such as Kmesh daemon activity, the number of eBPF maps, the number of entries in each map, and the duration of eBPF method execution is crucial to ensure optimal system performance.
33+
34+
In this proposal, I will analyze the performance monitoring metrics of Kmesh. I will also suggest the implementation of enhanced observability features within Kmesh to capture these critical performance indicators. This will allow users to seamlessly monitor Kmesh's performance and ensure system efficiency.
35+
36+
### Proposal
37+
38+
Kmesh needs to collect metrics through the kernel and pass them on to the user mode. In the user mode, the data related to eBPF maps and operation duration can ultimately be queried through Prometheus and visualized using Grafana.
39+
40+
#### Design Details
41+
42+
##### metrics related operation duration
43+
44+
This is because Kmesh needs to get metrics from the kernel and sent them to the user mode. We need a bpf map to record the metrics, as a vehicle for transferring.
45+
46+
So we need to define a bpf map that contains all the required metrics:
47+
48+
```
49+
struct operation_usage_data {
50+
__u64 start_time;
51+
__u64 end_time;
52+
__u32 operation_type;
53+
};
54+
55+
struct operation_usage_key {
56+
__u32 tid;
57+
__u32 operation_type;
58+
};
59+
60+
struct {
61+
__uint(type, BPF_MAP_TYPE_HASH);
62+
__type(key, struct operation_usage_key);
63+
__type(value, struct operation_usage_data);
64+
__uint(max_entries, 1024);
65+
} kmehs_perf_map SEC(".maps");
66+
67+
struct {
68+
__uint(type, BPF_MAP_TYPE_RINGBUF);
69+
__uint(max_entries, RINGBUF_SIZE);
70+
} kmesh_perf_info SEC(".maps");
71+
```
72+
73+
collect the timestamps at the beginning and end of the function, and at the end of the function, write the data from the map into the ring buffer.
74+
75+
##### metrics related ebpf map
76+
77+
In user space, retrieve relevant information from the eBPF maps, including the total number of maps, the number of entries in each map, the maximum number of entries in the maps, and the memory space locked by the maps.
78+
79+
![](pics/kmesh_map_and_operation_monitoring.jpg)
80+
81+
##### metrics related Kmesh resource usage
82+
83+
Provided by cAdvisor, use the `container_memory_usage_bytes` and `container_cpu_usage_seconds_total` metrics to visualize the memory usage and CPU consumption of Kmesh itself.
84+
![](pics/kmesh_daemon_monitoring.jpg)
85+
86+
![](pics/performance_monitoring.jpg)
87+
74 KB
Loading
141 KB
Loading
33.9 KB
Loading

0 commit comments

Comments
 (0)