Skip to content

Commit 171b8bb

Browse files
committed
imp: fix spongix dash and add spongix alert
1 parent e89d106 commit 171b8bb

File tree

4 files changed

+74
-74
lines changed

4 files changed

+74
-74
lines changed

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nix/cloud/alerts.nix

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
11
{
22
inputs,
33
cell,
4-
}:
5-
{
6-
ci-world-alert-group-1 = {
4+
}: {
5+
ci-world-spongix = {
76
datasource = "vm";
8-
# concurrency = 1; # Can override top level alert group details if needed, ex: concurrency default = 1
9-
# interval = "30s"; # Default = 1m
107
rules = [
118
{
12-
alert = "ci-world-custom-vm-alert-1";
9+
alert = "SpongixRemoteCacheFailure";
10+
expr = ''rate(prometheus_spongix_remote_cache_fail)[1h] > 0'';
11+
for = "2m";
12+
labels.severity = "critical";
1313
annotations = {
14-
description =
15-
"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes.";
16-
summary =
17-
"Service {{ $labels.job }} is down on {{ $labels.instance }}";
14+
description = "Spongix service on {{ $labels.hostname }} has had {{ $value }} remote cache failure(s) in the past 1 hour.";
15+
summary = "Spongix service on {{ $labels.hostname }} had a remote cache failure";
1816
};
19-
expr = ''up{job=~"fakeJob|anotherFakeJob"} == 0'';
20-
for = "2m";
21-
labels = { severity = "critical"; };
2217
}
2318
];
2419
};
2520

2621
# inherit (inputs.bitte-cells.bitte.alerts)
2722
# ;
2823
}
29-

nix/cloud/dashboards/spongix.json

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,15 @@
7474
"targets": [
7575
{
7676
"exemplar": true,
77-
"expr": "spongix_inflated_size_local_gauge",
77+
"expr": "prometheus_spongix_inflated_size_local",
7878
"interval": "",
7979
"legendFormat": "inflated",
8080
"queryType": "randomWalk",
8181
"refId": "A"
8282
},
8383
{
8484
"exemplar": true,
85-
"expr": "spongix_chunk_size_local_gauge",
85+
"expr": "prometheus_spongix_chunk_size_local",
8686
"hide": false,
8787
"interval": "",
8888
"legendFormat": "chunks",
@@ -181,7 +181,7 @@
181181
"targets": [
182182
{
183183
"exemplar": true,
184-
"expr": "1 - (spongix_chunk_size_local_gauge / spongix_inflated_size_local_gauge)",
184+
"expr": "1 - (prometheus_spongix_chunk_size_local / prometheus_spongix_inflated_size_local)",
185185
"interval": "",
186186
"legendFormat": "{{host}}",
187187
"refId": "A"
@@ -236,7 +236,7 @@
236236
"targets": [
237237
{
238238
"exemplar": true,
239-
"expr": "spongix_max_size_local_gauge - spongix_chunk_size_local_gauge",
239+
"expr": "prometheus_spongix_max_size_local - prometheus_spongix_chunk_size_local",
240240
"interval": "",
241241
"legendFormat": "{{host}}",
242242
"refId": "A"
@@ -294,15 +294,15 @@
294294
"targets": [
295295
{
296296
"exemplar": true,
297-
"expr": "spongix_index_count_local_gauge",
297+
"expr": "prometheus_spongix_index_count_local",
298298
"interval": "",
299299
"legendFormat": "index",
300300
"queryType": "randomWalk",
301301
"refId": "A"
302302
},
303303
{
304304
"exemplar": true,
305-
"expr": "spongix_chunk_count_local_gauge",
305+
"expr": "prometheus_spongix_chunk_count_local",
306306
"hide": false,
307307
"interval": "",
308308
"legendFormat": "chunks",
@@ -399,7 +399,7 @@
399399
"targets": [
400400
{
401401
"exemplar": true,
402-
"expr": "spongix_chunk_gc_bytes_local_counter",
402+
"expr": "prometheus_spongix_chunk_gc_bytes_local",
403403
"interval": "",
404404
"legendFormat": "bytes",
405405
"queryType": "randomWalk",
@@ -498,7 +498,7 @@
498498
"targets": [
499499
{
500500
"exemplar": true,
501-
"expr": "rate(spongix_index_walk_local_counter[$__rate_interval:1h])",
501+
"expr": "rate(prometheus_spongix_index_walk_local[$__rate_interval:1h])",
502502
"interval": "",
503503
"legendFormat": "time",
504504
"queryType": "randomWalk",
@@ -597,7 +597,7 @@
597597
"targets": [
598598
{
599599
"exemplar": true,
600-
"expr": "rate(spongix_chunk_walk_local_counter[$__rate_interval:1h])",
600+
"expr": "rate(prometheus_spongix_chunk_walk_local[$__rate_interval:1h])",
601601
"interval": "",
602602
"legendFormat": "time",
603603
"queryType": "randomWalk",
@@ -696,15 +696,15 @@
696696
"targets": [
697697
{
698698
"exemplar": true,
699-
"expr": "spongix_index_gc_count_local_counter",
699+
"expr": "prometheus_spongix_index_gc_count_local",
700700
"interval": "",
701701
"legendFormat": "index",
702702
"queryType": "randomWalk",
703703
"refId": "A"
704704
},
705705
{
706706
"exemplar": true,
707-
"expr": "spongix_chunk_gc_count_local_counter",
707+
"expr": "prometheus_spongix_chunk_gc_count_local",
708708
"hide": false,
709709
"interval": "",
710710
"legendFormat": "chunks",
@@ -801,7 +801,7 @@
801801
"targets": [
802802
{
803803
"exemplar": true,
804-
"expr": "rate(spongix_gc_time_local_counter[$__rate_interval:1h])",
804+
"expr": "rate(prometheus_spongix_gc_time_local[$__rate_interval:1h])",
805805
"interval": "",
806806
"legendFormat": "time",
807807
"queryType": "randomWalk",
@@ -898,15 +898,15 @@
898898
"targets": [
899899
{
900900
"exemplar": true,
901-
"expr": "spongix_remote_cache_ok_counter",
901+
"expr": "prometheus_spongix_remote_cache_ok",
902902
"interval": "",
903903
"legendFormat": "ok",
904904
"queryType": "randomWalk",
905905
"refId": "A"
906906
},
907907
{
908908
"exemplar": true,
909-
"expr": "spongix_remote_cache_fail_counter",
909+
"expr": "prometheus_spongix_remote_cache_fail",
910910
"hide": false,
911911
"interval": "",
912912
"legendFormat": "fail",
@@ -1003,7 +1003,7 @@
10031003
"targets": [
10041004
{
10051005
"exemplar": true,
1006-
"expr": "rate(spongix_verify_time_local_counter[$__rate_interval:1h])",
1006+
"expr": "rate(prometheus_spongix_verify_time_local[$__rate_interval:1h])",
10071007
"interval": "",
10081008
"legendFormat": "time",
10091009
"queryType": "randomWalk",
@@ -1100,7 +1100,7 @@
11001100
"targets": [
11011101
{
11021102
"exemplar": true,
1103-
"expr": "spongix_chunk_dir_count_gauge",
1103+
"expr": "prometheus_spongix_chunk_dir_count",
11041104
"interval": "",
11051105
"legendFormat": "dirs",
11061106
"queryType": "randomWalk",

nix/cloud/hydrationProfile.nix

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -194,48 +194,54 @@ in {
194194
# Observability State
195195
# --------------
196196
tf.hydrate-monitoring.configuration = {
197-
resource = inputs.bitte-cells._utils.library.mkMonitoring
198-
# Alert attrset
199-
{
200-
# Organelle local declared dashboards
201-
inherit (cell.alerts)
202-
# ci-world-alert-group-1
203-
# Upstream alerts which may have downstream deps can be imported here
204-
;
205-
# Upstream alerts not having downstream deps can be directly imported here
206-
inherit (inputs.bitte-cells.bitte.alerts)
207-
bitte-consul
208-
bitte-deadmanssnitch
209-
bitte-loki
210-
bitte-system
211-
bitte-vault
212-
bitte-vm-health
213-
bitte-vm-standalone
214-
bitte-vmagent
215-
;
216-
}
217-
# Dashboard attrset
218-
{
219-
# Organelle local declared dashboards
220-
inherit (cell.dashboards)
221-
ci-world-spongix
222-
;
197+
resource =
198+
inputs.bitte-cells._utils.library.mkMonitoring
199+
# Alert attrset
200+
{
201+
# Organelle local declared dashboards
202+
inherit
203+
(cell.alerts)
204+
ci-world-spongix
205+
# Upstream alerts which may have downstream deps can be imported here
206+
207+
;
208+
# Upstream alerts not having downstream deps can be directly imported here
209+
inherit
210+
(inputs.bitte-cells.bitte.alerts)
211+
bitte-consul
212+
bitte-deadmanssnitch
213+
bitte-loki
214+
bitte-system
215+
bitte-vault
216+
bitte-vm-health
217+
bitte-vm-standalone
218+
bitte-vmagent
219+
;
220+
}
221+
# Dashboard attrset
222+
{
223+
# Organelle local declared dashboards
224+
inherit
225+
(cell.dashboards)
226+
ci-world-spongix
227+
;
223228

224-
# Upstream dashboards not having downstream deps can be directly imported here
225-
inherit (inputs.bitte-cells.bitte.dashboards)
226-
bitte-consul
227-
bitte-log
228-
bitte-loki
229-
bitte-nomad
230-
bitte-system
231-
bitte-traefik
232-
bitte-vault
233-
bitte-vmagent
234-
bitte-vmalert
235-
bitte-vm
236-
bitte-vulnix
237-
;
238-
};
229+
# Upstream dashboards not having downstream deps can be directly imported here
230+
inherit
231+
(inputs.bitte-cells.bitte.dashboards)
232+
bitte-consul
233+
bitte-log
234+
bitte-loki
235+
bitte-nomad
236+
bitte-system
237+
bitte-traefik
238+
bitte-vault
239+
bitte-vmagent
240+
bitte-vmalert
241+
bitte-vm
242+
bitte-vulnix
243+
;
244+
};
239245
};
240246

241247
# application state (terraform)

0 commit comments

Comments
 (0)