Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 8c82746

Browse files
authored
Merge pull request #319 from grafana/darrenjaneczek/config-job-aggregation
refactor: config for job aggregation strings
2 parents e7cbfe4 + a03451c commit 8c82746

File tree

9 files changed

+77
-21
lines changed

9 files changed

+77
-21
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
* [CHANGE] Store-gateway: increased `-blocks-storage.bucket-store.max-chunk-pool-bytes` from 2GB (default) to 12GB. #322
1111
* [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317
1212
* [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308
13+
* [CHANGE] Dashboards: added overridable `job_labels` and `cluster_labels` to the configuration object as label lists to uniquely identify jobs and clusters in the metric names and group-by lists in dashboards. #319
14+
* [CHANGE] Dashboards: `alert_aggregation_labels` has been removed from the configuration and overriding this value has been deprecated. Instead the labels are now defined by the `cluster_labels` list, and should be overridden accordingly through that list. #319
1315

1416
## 1.9.0 / 2021-05-18
1517

cortex-mixin/alerts.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
(import 'alerts/compactor.libsonnet')
99
else {}) +
1010

11-
{ _config:: $._config },
11+
{ _config:: $._config + $._group_config },
1212
}

cortex-mixin/alerts/alerts.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
{
4040
alert: 'CortexRequestLatency',
4141
expr: |||
42-
cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop"}
42+
%(group_prefix_jobs)s_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process|ready|/schedulerpb.SchedulerForFrontend/FrontendLoop|/schedulerpb.SchedulerForQuerier/QuerierLoop"}
4343
>
4444
%(cortex_p99_latency_threshold_seconds)s
4545
||| % $._config,

cortex-mixin/config.libsonnet

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@
3838
compactor: 'compactor.*', // Match also custom compactor deployments.
3939
},
4040

41-
// Labels used to in alert aggregations - should uniquely identify
42-
// a single Cortex cluster.
43-
alert_aggregation_labels: 'cluster, namespace',
41+
// Grouping labels, to uniquely identify and group by {jobs, clusters}
42+
job_labels: ['cluster', 'namespace', 'job'],
43+
cluster_labels: ['cluster', 'namespace'],
44+
4445
cortex_p99_latency_threshold_seconds: 2.5,
4546

4647
// Whether resources dashboards are enabled (based on cAdvisor metrics).

cortex-mixin/dashboards.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,5 @@
3131
(import 'dashboards/writes-resources.libsonnet') +
3232
(import 'dashboards/alertmanager-resources.libsonnet')) +
3333

34-
{ _config:: $._config },
34+
{ _config:: $._config + $._group_config },
3535
}

cortex-mixin/dashboards/writes.libsonnet

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,22 @@ local utils = import 'mixin-utils/utils.libsonnet';
1212
})
1313
.addPanel(
1414
$.panel('Samples / s') +
15-
$.statPanel('sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{%s})' % $.jobMatcher($._config.job_names.distributor), format='reqps')
15+
$.statPanel(
16+
'sum(%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m{%(job)s})' % (
17+
$._config {
18+
job: $.jobMatcher($._config.job_names.distributor),
19+
}
20+
),
21+
format='reqps'
22+
)
1623
)
1724
.addPanel(
1825
$.panel('Active Series') +
1926
$.statPanel(|||
2027
sum(cortex_ingester_memory_series{%(ingester)s}
21-
/ on(namespace) group_left
22-
max by (namespace) (cortex_distributor_replication_factor{%(distributor)s}))
23-
||| % {
28+
/ on(%(group_by_cluster)s) group_left
29+
max by (%(group_by_cluster)s) (cortex_distributor_replication_factor{%(distributor)s}))
30+
||| % ($._config) {
2431
ingester: $.jobMatcher($._config.job_names.ingester),
2532
distributor: $.jobMatcher($._config.job_names.distributor),
2633
}, format='short')

cortex-mixin/groups.libsonnet

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
local makePrefix(groups) = std.join('_', groups),
3+
local makeGroupBy(groups) = std.join(', ', groups),
4+
5+
local group_by_cluster = makeGroupBy($._config.cluster_labels),
6+
7+
_group_config+:: {
8+
// Each group prefix is composed of `_`-separated labels
9+
group_prefix_jobs: makePrefix($._config.job_labels),
10+
group_prefix_clusters: makePrefix($._config.cluster_labels),
11+
12+
// Each group-by label list is `, `-separated and unique identifies
13+
group_by_job: makeGroupBy($._config.job_labels),
14+
group_by_cluster: group_by_cluster,
15+
},
16+
17+
// The following works around the deprecation of `$._config.alert_aggregation_labels`
18+
// - If an override of that value is detected, a warning will be printed
19+
// - If no override was detected, it will be set to the `group_by_cluster` value,
20+
// which will replace it altogether in the future.
21+
local alert_aggregation_labels_override = (
22+
{
23+
alert_aggregation_labels: null,
24+
} + super._config
25+
).alert_aggregation_labels,
26+
27+
_config+:: {
28+
alert_aggregation_labels:
29+
if alert_aggregation_labels_override != null
30+
then std.trace(
31+
|||
32+
Deprecated: _config.alert_aggregation_labels
33+
This field has been explicitly overridden to "%s".
34+
Instead, express the override in terms of _config.cluster_labels.
35+
E.g., cluster_labels: %s will automatically convert to "%s".
36+
||| % [
37+
alert_aggregation_labels_override,
38+
$._config.cluster_labels,
39+
group_by_cluster,
40+
],
41+
alert_aggregation_labels_override
42+
)
43+
else group_by_cluster,
44+
},
45+
}

cortex-mixin/mixin.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
(import 'config.libsonnet') +
2+
(import 'groups.libsonnet') +
23
(import 'dashboards.libsonnet') +
34
(import 'alerts.libsonnet') +
45
(import 'recording_rules.libsonnet')

cortex-mixin/recording_rules.libsonnet

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
local utils = import 'mixin-utils/utils.libsonnet';
22

33
{
4+
local _config = {
5+
max_series_per_ingester: 1.5e6,
6+
max_samples_per_sec_per_ingester: 80e3,
7+
max_samples_per_sec_per_distributor: 240e3,
8+
limit_utilisation_target: 0.6,
9+
} + $._config + $._group_config,
410
prometheusRules+:: {
511
groups+: [
612
{
@@ -51,20 +57,14 @@ local utils = import 'mixin-utils/utils.libsonnet';
5157
name: 'cortex_received_samples',
5258
rules: [
5359
{
54-
record: 'cluster_namespace_job:cortex_distributor_received_samples:rate5m',
60+
record: '%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m' % _config,
5561
expr: |||
56-
sum by (cluster, namespace, job) (rate(cortex_distributor_received_samples_total[5m]))
57-
|||,
62+
sum by (%(group_by_job)s) (rate(cortex_distributor_received_samples_total[5m]))
63+
||| % _config,
5864
},
5965
],
6066
},
6167
{
62-
local _config = {
63-
max_series_per_ingester: 1.5e6,
64-
max_samples_per_sec_per_ingester: 80e3,
65-
max_samples_per_sec_per_distributor: 240e3,
66-
limit_utilisation_target: 0.6,
67-
},
6868
name: 'cortex_scaling_rules',
6969
rules: [
7070
{
@@ -89,7 +89,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
8989
ceil(
9090
quantile_over_time(0.99,
9191
sum by (cluster, namespace) (
92-
cluster_namespace_job:cortex_distributor_received_samples:rate5m
92+
%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m
9393
)[24h:]
9494
)
9595
/ %(max_samples_per_sec_per_distributor)s
@@ -123,7 +123,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
123123
ceil(
124124
quantile_over_time(0.99,
125125
sum by (cluster, namespace) (
126-
cluster_namespace_job:cortex_distributor_received_samples:rate5m
126+
%(group_prefix_jobs)s:cortex_distributor_received_samples:rate5m
127127
)[24h:]
128128
)
129129
* 3 / %(max_samples_per_sec_per_ingester)s

0 commit comments

Comments
 (0)