Skip to content

Commit 8fba58f

Browse files
schmikeiDasomeone
andauthored
Apache Mesos Mixin Modernization (#1488)
* update apache mesos mixin to use modern libraries * touch up based off testing * forgot to commit dashboards_out * remove percent in name * Apply suggestions from code review Co-authored-by: Emily <1282515+Dasomeone@users.noreply.github.com> * commonlib components * switch over to commonlib * fix formatting * rely on commonlib stylings a little bit more --------- Co-authored-by: Emily <1282515+Dasomeone@users.noreply.github.com>
1 parent d7c1c77 commit 8fba58f

16 files changed

+1418
-2477
lines changed

apache-mesos-mixin/alerts/alerts.libsonnet renamed to apache-mesos-mixin/alerts.libsonnet

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
prometheusAlerts+:: {
2+
new(this): {
33
groups+: [
44
{
55
name: 'apache-mesos',
@@ -8,7 +8,7 @@
88
alert: 'ApacheMesosHighMemoryUsage',
99
expr: |||
1010
min without(instance, job, type) (mesos_master_mem{type="percent"}) > %(alertsWarningMemoryUsage)s
11-
||| % $._config,
11+
||| % this.config,
1212
'for': '5m',
1313
labels: {
1414
severity: 'warning',
@@ -19,14 +19,14 @@
1919
(
2020
'{{ printf "%%.0f" $value }} percent memory usage on {{$labels.mesos_cluster}}, ' +
2121
'which is above the threshold of %(alertsWarningMemoryUsage)s.'
22-
) % $._config,
22+
) % this.config,
2323
},
2424
},
2525
{
2626
alert: 'ApacheMesosHighDiskUsage',
2727
expr: |||
2828
min without(instance, job, type) (mesos_master_disk{type="percent"}) > %(alertsCriticalDiskUsage)s
29-
||| % $._config,
29+
||| % this.config,
3030
'for': '5m',
3131
labels: {
3232
severity: 'critical',
@@ -37,14 +37,14 @@
3737
(
3838
'{{ printf "%%.0f" $value }} percent disk usage on {{$labels.mesos_cluster}}, ' +
3939
'which is above the threshold of %(alertsCriticalDiskUsage)s.'
40-
) % $._config,
40+
) % this.config,
4141
},
4242
},
4343
{
4444
alert: 'ApacheMesosUnreachableTasks',
4545
expr: |||
4646
max without(instance, job, state) (mesos_master_task_states_current{state="unreachable"}) > %(alertsWarningUnreachableTask)s
47-
||| % $._config,
47+
||| % this.config,
4848
'for': '5m',
4949
labels: {
5050
severity: 'warning',
@@ -55,14 +55,14 @@
5555
(
5656
'{{ printf "%%.0f" $value }} unreachable tasks on {{$labels.mesos_cluster}}, ' +
5757
'which is above the threshold of %(alertsWarningUnreachableTask)s.'
58-
) % $._config,
58+
) % this.config,
5959
},
6060
},
6161
{
6262
alert: 'ApacheMesosNoLeaderElected',
6363
expr: |||
6464
max without(instance, job) (mesos_master_elected) == 0
65-
||| % $._config,
65+
||| % this.config,
6666
'for': '1m',
6767
labels: {
6868
severity: 'critical',
@@ -72,14 +72,14 @@
7272
description:
7373
(
7474
'There is no cluster coordinator on {{$labels.mesos_cluster}}.'
75-
) % $._config,
75+
) % this.config,
7676
},
7777
},
7878
{
7979
alert: 'ApacheMesosInactiveAgents',
8080
expr: |||
8181
max without(instance, job, state) (mesos_master_slaves_state{state=~"connected_inactive|disconnected_inactive"}) > 1
82-
||| % $._config,
82+
||| % this.config,
8383
'for': '5m',
8484
labels: {
8585
severity: 'warning',
@@ -89,7 +89,7 @@
8989
description:
9090
(
9191
'{{ printf "%%.0f" $value }} inactive agent clients over the last 5m which is above the threshold of 1.'
92-
) % $._config,
92+
) % this.config,
9393
},
9494
},
9595
],
Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,36 @@
11
{
2-
_config+:: {
3-
dashboardTags: ['apache-mesos-mixin'],
4-
dashboardPeriod: 'now-1h',
5-
dashboardTimezone: 'default',
6-
dashboardRefresh: '1m',
72

8-
// alerts thresholds
9-
alertsWarningMemoryUsage: 90,
10-
alertsCriticalDiskUsage: 90,
11-
alertsWarningUnreachableTask: 3,
12-
enableLokiLogs: true,
13-
enableMultiCluster: false,
14-
mesosSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
15-
multiclusterSelector: 'job=~"$job"',
3+
local this = self,
4+
filteringSelector: 'job="integrations/apache-mesos"',
5+
groupLabels: ['job', 'mesos_cluster', 'cluster'],
6+
instanceLabels: ['instance'],
7+
8+
dashboardTags: [self.uid + '-mixin'],
9+
uid: 'apache-mesos',
10+
dashboardNamePrefix: 'Apache Mesos',
11+
dashboardPeriod: 'now-1h',
12+
dashboardTimezone: 'default',
13+
dashboardRefresh: '1m',
14+
15+
// Logging configuration
16+
enableLokiLogs: true,
17+
logLabels: ['job', 'cluster', 'instance'],
18+
extraLogLabels: ['level'], // Required by logs-lib
19+
logsVolumeGroupBy: 'level',
20+
showLogsVolume: true,
21+
22+
// alerts thresholds
23+
alertsWarningMemoryUsage: 90,
24+
alertsCriticalDiskUsage: 90,
25+
alertsWarningUnreachableTask: 3,
26+
27+
// metrics source for signals library
28+
metricsSource: 'prometheus',
29+
30+
// signals configuration
31+
signals+: {
32+
overview: (import './signals/overview.libsonnet')(this),
33+
master: (import './signals/master.libsonnet')(this),
34+
agent: (import './signals/agent.libsonnet')(this),
1635
},
1736
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
local g = import './g.libsonnet';
2+
local logslib = import 'logs-lib/logs/main.libsonnet';
3+
4+
{
5+
local root = self,
6+
new(this)::
7+
local links = this.grafana.links;
8+
local tags = this.config.dashboardTags;
9+
local uid = g.util.string.slugify(this.config.uid);
10+
local vars = this.grafana.variables;
11+
local annotations = this.grafana.annotations;
12+
local prefix = this.config.dashboardNamePrefix;
13+
local refresh = this.config.dashboardRefresh;
14+
local period = this.config.dashboardPeriod;
15+
local timezone = this.config.dashboardTimezone;
16+
17+
18+
{
19+
20+
'apache-mesos-overview.json':
21+
g.dashboard.new(this.config.dashboardNamePrefix + ' overview')
22+
+ g.dashboard.withPanels(
23+
g.util.panel.resolveCollapsedFlagOnRows(
24+
g.util.grid.wrapPanels([
25+
this.grafana.rows.masterOverview,
26+
this.grafana.rows.agentOverview,
27+
])
28+
)
29+
) + root.applyCommon(
30+
vars.multiInstance,
31+
uid + '_overview',
32+
tags,
33+
links { apacheMesosOverview+:: {} },
34+
annotations,
35+
timezone,
36+
refresh,
37+
period,
38+
),
39+
}
40+
+ if this.config.enableLokiLogs then {
41+
'apache-mesos-logs.json':
42+
logslib.new(
43+
prefix + ' logs',
44+
datasourceName=this.grafana.variables.datasources.loki.name,
45+
datasourceRegex=this.grafana.variables.datasources.loki.regex,
46+
filterSelector=this.config.filteringSelector,
47+
labels=this.config.groupLabels + this.config.extraLogLabels,
48+
formatParser=null,
49+
showLogsVolume=this.config.showLogsVolume,
50+
) {
51+
dashboards+: {
52+
logs+:
53+
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
54+
},
55+
panels+: {
56+
logs+:
57+
g.panel.logs.options.withEnableLogDetails(true)
58+
+ g.panel.logs.options.withShowTime(false)
59+
+ g.panel.logs.options.withWrapLogMessage(false),
60+
},
61+
variables+: {
62+
toArray+: [
63+
this.grafana.variables.datasources.prometheus { hide: 2 },
64+
],
65+
},
66+
}.dashboards.logs,
67+
} else {},
68+
69+
applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
70+
g.dashboard.withTags(tags)
71+
+ g.dashboard.withUid(uid)
72+
+ g.dashboard.withLinks(std.objectValues(links))
73+
+ g.dashboard.withTimezone(timezone)
74+
+ g.dashboard.withRefresh(refresh)
75+
+ g.dashboard.time.withFrom(period)
76+
+ g.dashboard.withVariables(vars),
77+
}

0 commit comments

Comments
 (0)