@@ -94,26 +94,26 @@ local utils = import 'mixin-utils/utils.libsonnet';
9494 .addRow(
9595 $.row('Replication' )
9696 .addPanel(
97- $.panel('Tenants (By Instance)' ) +
97+ $.panel('Per %s Tenants' % $._config.per_instance_label ) +
9898 $.queryPanel(
99- 'sum by(pod ) (cortex_alertmanager_tenants_owned{%s})' % $. jobMatcher('alertmanager' ),
100- '{{pod }}'
99+ 'max by(%s ) (cortex_alertmanager_tenants_owned{%s})' % [$._config.per_instance_label, $. jobMatcher('alertmanager' )] ,
100+ '{{%s }}' % $._config.per_instance_label
101101 ) +
102102 $.stack
103103 )
104104 .addPanel(
105- $.panel('Alerts (By Instance)' ) +
105+ $.panel('Per %s Alerts' % $._config.per_instance_label ) +
106106 $.queryPanel(
107- 'sum by(pod ) (cortex_alertmanager_alerts{%s})' % $. jobMatcher('alertmanager' ),
108- '{{pod }}'
107+ 'sum by(%s ) (cortex_alertmanager_alerts{%s})' % [$._config.per_instance_label, $. jobMatcher('alertmanager' )] ,
108+ '{{%s }}' % $._config.per_instance_label
109109 ) +
110110 $.stack
111111 )
112112 .addPanel(
113- $.panel('Silences (By Instance)' ) +
113+ $.panel('Per %s Silences' % $._config.per_instance_label ) +
114114 $.queryPanel(
115- 'sum by(pod ) (cortex_alertmanager_silences{%s})' % $. jobMatcher('alertmanager' ),
116- '{{pod }}'
115+ 'sum by(%s ) (cortex_alertmanager_silences{%s})' % [$._config.per_instance_label, $. jobMatcher('alertmanager' )] ,
116+ '{{%s }}' % $._config.per_instance_label
117117 ) +
118118 $.stack
119119 )
@@ -150,37 +150,20 @@ local utils = import 'mixin-utils/utils.libsonnet';
150150 )
151151 )
152152 .addRow(
153- $.row('Sharding Initial State Sync' )
153+ $.row('Sharding Runtime State Sync' )
154154 .addPanel(
155155 $.panel('Syncs/sec' ) +
156- $.queryPanel(
157- [
158- |||
159- sum(rate(cortex_alertmanager_state_initial_sync_total{%s}[$__rate_interval]))
160- -
161- sum(rate(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed",%s}[$__rate_interval]))
162- ||| % [$.jobMatcher('alertmanager' ), $.jobMatcher('alertmanager' )],
163- 'sum(rate(cortex_alertmanager_state_initial_sync_completed_total{outcome="failed",%s}[$__rate_interval]))' % $.jobMatcher('alertmanager' ),
164- ],
165- ['success' , 'failed' ]
166- )
167- )
168- .addPanel(
169- $.panel('Syncs/sec (By Outcome)' ) +
170156 $.queryPanel(
171157 'sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager' ),
172158 '{{outcome}}'
173159 )
174160 )
175161 .addPanel(
176- $.panel('Duration ' ) +
177- utils.latencyRecordingRulePanel ('cortex_alertmanager_state_initial_sync_duration_seconds' , $.jobSelector ('alertmanager' ))
162+ $.panel('Sync duration ' ) +
163+ $.latencyPanel ('cortex_alertmanager_state_initial_sync_duration_seconds' , '{%s}' % $.jobMatcher ('alertmanager' ))
178164 )
179- )
180- .addRow(
181- $.row('Sharding State Operations' )
182165 .addPanel(
183- $.panel('Replica Fetches /sec' ) +
166+ $.panel('Fetch state from other alertmanagers /sec' ) +
184167 $.queryPanel(
185168 [
186169 |||
@@ -193,8 +176,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
193176 ['success' , 'failed' ]
194177 )
195178 )
179+ )
180+ .addRow(
181+ $.row('Sharding State Operations' )
196182 .addPanel(
197- $.panel('Replica Updates /sec' ) +
183+ $.panel('Replicate state to other alertmanagers /sec' ) +
198184 $.queryPanel(
199185 [
200186 |||
@@ -208,7 +194,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
208194 )
209195 )
210196 .addPanel(
211- $.panel('Partial Merges /sec' ) +
197+ $.panel('Merge state from other alertmanagers /sec' ) +
212198 $.queryPanel(
213199 [
214200 |||
@@ -222,7 +208,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
222208 )
223209 )
224210 .addPanel(
225- $.panel('Remote Storage Persists /sec' ) +
211+ $.panel('Persist state to remote storage /sec' ) +
226212 $.queryPanel(
227213 [
228214 |||
0 commit comments