Skip to content

Commit 036e7e7

Browse files
schmikeiDasomeone
andauthored
Modernize the apache tomcat mixin (#1493)
* modernize the apache tomcat mixin * ensure style guide is followed * stylistic updates * make fmt * make the dashboard a tad less cookie cutter * forgot to commit latest * add to the table * Apply suggestions from code review Co-authored-by: Emily <1282515+Dasomeone@users.noreply.github.com> * remove cassandra_cluster label in favor of cluster label --------- Co-authored-by: Emily <1282515+Dasomeone@users.noreply.github.com>
1 parent cbd6e16 commit 036e7e7

19 files changed

+2735
-2374
lines changed

apache-tomcat-mixin/alerts/alerts.libsonnet renamed to apache-tomcat-mixin/alerts.libsonnet

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
{
2-
prometheusAlerts+:: {
2+
new(this): {
33
groups+: [
44
{
55
name: 'ApacheTomcatAlerts',
66
rules: [
77
{
88
alert: 'ApacheTomcatAlertsHighCpuUsage',
99
expr: |||
10-
sum by (%(agg)s) (jvm_process_cpu_load{%(filteringSelector)s}) > %(ApacheTomcatAlertsCriticalCpuUsage)s
11-
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
10+
sum by (%(agg)s) (jvm_process_cpu_load{%(filteringSelector)s}) > %(alertsCriticalCpuUsage)s
11+
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
1212
'for': '5m',
1313
labels: {
1414
severity: 'critical',
@@ -18,15 +18,15 @@
1818
description:
1919
(
2020
'The CPU usage has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
21-
'which is above the threshold of %(ApacheTomcatAlertsCriticalCpuUsage)s percent.'
22-
) % $._config,
21+
'which is above the threshold of %(alertsCriticalCpuUsage)s percent.'
22+
) % this.config,
2323
},
2424
},
2525
{
2626
alert: 'ApacheTomcatAlertsHighMemoryUsage',
2727
expr: |||
28-
sum(jvm_memory_usage_used_bytes{%(filteringSelector)s}) by (%(agg)s) / sum(jvm_physical_memory_bytes{%(filteringSelector)s}) by (%(agg)s) * 100 > %(ApacheTomcatAlertsCriticalMemoryUsage)s
29-
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
28+
sum(jvm_memory_usage_used_bytes{%(filteringSelector)s}) by (%(agg)s) / sum(jvm_physical_memory_bytes{%(filteringSelector)s}) by (%(agg)s) * 100 > %(alertsCriticalMemoryUsage)s
29+
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
3030
'for': '5m',
3131
labels: {
3232
severity: 'critical',
@@ -36,15 +36,15 @@
3636
description:
3737
(
3838
'The memory usage has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
39-
'which is above the threshold of %(ApacheTomcatAlertsCriticalMemoryUsage)s percent.'
40-
) % $._config,
39+
'which is above the threshold of %(alertsCriticalMemoryUsage)s percent.'
40+
) % this.config,
4141
},
4242
},
4343
{
44-
alert: 'ApacheTomcatAlertsHighRequestErrorPercent',
44+
alert: 'ApacheTomcatAlertsRequestErrors',
4545
expr: |||
46-
sum by (%(agg)s) (increase(tomcat_errorcount_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m]) * 100) > %(ApacheTomcatAlertsCriticalRequestErrorPercentage)s
47-
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
46+
sum by (%(agg)s) (increase(tomcat_errorcount_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m]) * 100) > %(alertsCriticalRequestErrorPercentage)s
47+
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
4848
'for': '5m',
4949
labels: {
5050
severity: 'critical',
@@ -54,15 +54,15 @@
5454
description:
5555
(
5656
'The percentage of request errors has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
57-
'which is above the threshold of %(ApacheTomcatAlertsCriticalRequestErrorPercentage)s percent.'
58-
) % $._config,
57+
'which is above the threshold of %(alertsCriticalRequestErrorPercentage)s percent.'
58+
) % this.config,
5959
},
6060
},
6161
{
62-
alert: 'ApacheTomcatAlertsModeratelyHighProcessingTime',
62+
alert: 'ApacheTomcatAlertsHighProcessingTime',
6363
expr: |||
64-
sum by (%(agg)s) (increase(tomcat_processingtime_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m])) > %(ApacheTomcatAlertsWarningProcessingTime)s
65-
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
64+
sum by (%(agg)s) (increase(tomcat_processingtime_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m])) > %(alertsWarningProcessingTime)s
65+
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
6666
'for': '5m',
6767
labels: {
6868
severity: 'warning',
@@ -72,8 +72,8 @@
7272
description:
7373
(
7474
'The processing time has been at {{ printf "%%.0f" $value }}ms over the last 5 minutes on {{$labels.instance}}, ' +
75-
'which is above the threshold of %(ApacheTomcatAlertsWarningProcessingTime)sms.'
76-
) % $._config,
75+
'which is above the threshold of %(alertsWarningProcessingTime)sms.'
76+
) % this.config,
7777
},
7878
},
7979
],
Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,33 @@
11
{
2-
_config+:: {
3-
dashboardTags: ['apache-tomcat-mixin'],
4-
dashboardPeriod: 'now-1h',
5-
dashboardTimezone: 'default',
6-
dashboardRefresh: '1m',
2+
local this = self,
3+
filteringSelector: 'job="integrations/tomcat"',
4+
groupLabels: ['job', 'cluster'],
5+
logLabels: [],
6+
instanceLabels: ['instance'],
77

8-
//alert thresholds
9-
ApacheTomcatAlertsCriticalCpuUsage: 80, //%
10-
ApacheTomcatAlertsCriticalMemoryUsage: 80, //%
11-
ApacheTomcatAlertsCriticalRequestErrorPercentage: 5, //%
12-
ApacheTomcatAlertsWarningProcessingTime: 300, //ms
8+
uid: 'apache-tomcat',
9+
dashboardTags: [self.uid + '-mixin'],
10+
dashboardNamePrefix: 'Apache Tomcat',
11+
dashboardPeriod: 'now-1h',
12+
dashboardTimezone: 'default',
13+
dashboardRefresh: '1m',
14+
metricsSource: ['prometheus'], // metrics source for signals
1315

14-
// used in alerts:
15-
filteringSelector: 'job="integrations/tomcat"',
16-
groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'],
17-
instanceLabels: ['instance'],
1816

19-
enableLokiLogs: true,
20-
enableMultiCluster: false,
21-
multiclusterSelector: 'job=~"$job"',
22-
tomcatSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
17+
// Logging configuration
18+
enableLokiLogs: true,
19+
extraLogLabels: ['level'], // Required by logs-lib
20+
logsVolumeGroupBy: 'level',
21+
showLogsVolume: true,
22+
23+
// alert thresholds
24+
alertsCriticalCpuUsage: 80, //%
25+
alertsCriticalMemoryUsage: 80, //%
26+
alertsCriticalRequestErrorPercentage: 5, //%
27+
alertsWarningProcessingTime: 300, //ms
28+
29+
signals+: {
30+
overview: (import './signals/overview.libsonnet')(this),
31+
hosts: (import './signals/hosts.libsonnet')(this),
2332
},
2433
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
local g = import './g.libsonnet';
2+
local commonlib = import 'common-lib/common/main.libsonnet';
3+
local logslib = import 'logs-lib/logs/main.libsonnet';
4+
5+
{
6+
local root = self,
7+
new(this)::
8+
local prefix = this.config.dashboardNamePrefix;
9+
local links = this.grafana.links;
10+
local tags = this.config.dashboardTags;
11+
local uid = g.util.string.slugify(this.config.uid);
12+
local vars = this.grafana.variables;
13+
local annotations = this.grafana.annotations;
14+
local refresh = this.config.dashboardRefresh;
15+
local period = this.config.dashboardPeriod;
16+
local timezone = this.config.dashboardTimezone;
17+
{
18+
'apache-tomcat-overview.json':
19+
g.dashboard.new(prefix + ' overview')
20+
+ g.dashboard.withPanels(
21+
g.util.panel.resolveCollapsedFlagOnRows(
22+
g.util.grid.wrapPanels(
23+
[
24+
this.grafana.rows.overview,
25+
],
26+
),
27+
),
28+
) + root.applyCommon(
29+
vars.multiInstance + [
30+
g.dashboard.variable.query.new(
31+
'protocol',
32+
query='label_values(tomcat_bytesreceived_total{%(queriesSelector)s}, protocol)' % vars
33+
)
34+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
35+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='protocol', metric='tomcat_bytesreceived_total{%(queriesSelector)s}' % vars)
36+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
37+
38+
g.dashboard.variable.query.new(
39+
'port',
40+
query='label_values(tomcat_bytesreceived_total{%(queriesSelector)s}, port)' % vars
41+
)
42+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
43+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='port', metric='tomcat_bytesreceived_total{%(queriesSelector)s}' % vars)
44+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
45+
],
46+
uid + '_overview',
47+
tags,
48+
links { apacheTomcatOverview:: {} },
49+
annotations,
50+
timezone,
51+
refresh,
52+
period
53+
),
54+
55+
'apache-tomcat-hosts.json':
56+
g.dashboard.new(prefix + ' hosts')
57+
+ g.dashboard.withPanels(
58+
g.util.panel.resolveCollapsedFlagOnRows(
59+
g.util.grid.wrapPanels(
60+
[
61+
this.grafana.rows.hosts,
62+
this.grafana.rows.hostServlets,
63+
],
64+
),
65+
),
66+
) + root.applyCommon(
67+
vars.multiInstance + [
68+
g.dashboard.variable.query.new('host')
69+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
70+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='host', metric='tomcat_session_sessioncounter_total{%(queriesSelector)s}' % vars)
71+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
72+
73+
g.dashboard.variable.query.new('context')
74+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
75+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='context', metric='tomcat_session_sessioncounter_total{%(queriesSelector)s}' % vars)
76+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
77+
78+
g.dashboard.variable.query.new('servlet')
79+
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
80+
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='servlet', metric='tomcat_servlet_requestcount_total{%(queriesSelector)s}' % vars)
81+
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
82+
],
83+
uid + '_hosts',
84+
tags,
85+
links { apacheTomcatHosts:: {} },
86+
annotations,
87+
timezone,
88+
refresh,
89+
period
90+
),
91+
} + if this.config.enableLokiLogs then {
92+
'apache-tomcat-logs.json':
93+
logslib.new(
94+
prefix + ' logs',
95+
datasourceName=this.grafana.variables.datasources.loki.name,
96+
datasourceRegex=this.grafana.variables.datasources.loki.regex,
97+
filterSelector=this.config.filteringSelector,
98+
labels=this.config.groupLabels + this.config.extraLogLabels,
99+
formatParser=null,
100+
showLogsVolume=this.config.showLogsVolume,
101+
)
102+
{
103+
dashboards+:
104+
{
105+
logs+:
106+
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { apacheTomcatLogs:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
107+
},
108+
panels+:
109+
{
110+
logs+:
111+
g.panel.logs.options.withEnableLogDetails(true)
112+
+ g.panel.logs.options.withShowTime(false)
113+
+ g.panel.logs.options.withWrapLogMessage(false),
114+
},
115+
variables+: {
116+
toArray+: [
117+
this.grafana.variables.datasources.prometheus { hide: 2 },
118+
],
119+
},
120+
}.dashboards.logs,
121+
},
122+
123+
applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
124+
g.dashboard.withTags(tags)
125+
+ g.dashboard.withUid(uid)
126+
+ g.dashboard.withLinks(std.objectValues(links))
127+
+ g.dashboard.withTimezone(timezone)
128+
+ g.dashboard.withRefresh(refresh)
129+
+ g.dashboard.time.withFrom(period)
130+
+ g.dashboard.withVariables(vars)
131+
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
132+
}

0 commit comments

Comments
 (0)