Skip to content

Commit 7b9ed23

Browse files
committed
imp: add darwin alerts and signers
1 parent 12d8791 commit 7b9ed23

File tree

4 files changed

+83
-6
lines changed

4 files changed

+83
-6
lines changed

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nix/cloud/alerts.nix

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,24 @@
22
inputs,
33
cell,
44
}: {
5+
ci-world-darwin = {
6+
datasource = "vm";
7+
rules = [
8+
{
9+
alert = "DarwinSshFailure";
10+
expr = ''probe_success{job="blackbox-ssh-darwin"} == 0'';
11+
for = "5m";
12+
labels.severity = "critical";
13+
annotations = {
14+
description = ''
15+
Cluster ssh connectivity to darwin builder {{ $labels.alias }} at {{ $labels.instance }}
16+
has been down for more than 5 minutes. Darwin CI capacity is degraded or down.'';
17+
summary = "Connectivity to Darwin builder {{ $labels.alias }} is down";
18+
};
19+
}
20+
];
21+
};
22+
523
ci-world-spongix = {
624
datasource = "vm";
725
rules = [

nix/cloud/hydrationProfile.nix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ in {
204204
# Cell Block local declared dashboards
205205
inherit
206206
(cell.alerts)
207+
ci-world-darwin
207208
ci-world-spongix
208209
ci-world-nomad-follower
209210
# Upstream alerts which may have downstream deps can be imported here

nix/metal/bitteProfile/default.nix

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,65 @@ in {
169169
subnet = cluster.vpc.subnets.core-1;
170170
volumeSize = 300;
171171

172-
modules = [bitte.profiles.monitoring];
172+
modules = [
173+
bitte.profiles.monitoring
174+
({lib, ...}: {
175+
services.prometheus.exporters.blackbox = lib.mkForce {
176+
enable = true;
177+
configFile = pkgs.toPrettyJSON "blackbox-exporter.yaml" {
178+
modules = {
179+
ssh_banner = {
180+
prober = "tcp";
181+
timeout = "10s";
182+
tcp = {
183+
preferred_ip_protocol = "ip4";
184+
query_response = [
185+
{
186+
expect = "^SSH-2.0-";
187+
send = "SSH-2.0-blackbox-ssh-check";
188+
}
189+
];
190+
};
191+
};
192+
};
193+
};
194+
};
195+
196+
services.vmagent.promscrapeConfig = let
197+
mkTarget = ip: machine: {
198+
targets = ["${ip}:22"];
199+
labels.alias = machine;
200+
};
201+
in [
202+
{
203+
job_name = "blackbox-ssh-darwin";
204+
scrape_interval = "60s";
205+
metrics_path = "/probe";
206+
params.module = ["ssh_banner"];
207+
static_configs = [
208+
(mkTarget "10.10.0.1" "mm1-builder")
209+
(mkTarget "10.10.0.2" "mm2-builder")
210+
(mkTarget "10.10.0.101" "mm1-signer")
211+
(mkTarget "10.10.0.102" "mm2-signer")
212+
];
213+
relabel_configs = [
214+
{
215+
source_labels = ["__address__"];
216+
target_label = "__param_target";
217+
}
218+
{
219+
source_labels = ["__param_target"];
220+
target_label = "instance";
221+
}
222+
{
223+
replacement = "127.0.0.1:9115";
224+
target_label = "__address__";
225+
}
226+
];
227+
}
228+
];
229+
})
230+
];
173231

174232
securityGroupRules = {
175233
inherit
@@ -278,13 +336,13 @@ in {
278336
# mm1
279337
{
280338
publicKey = "nvKCarVUXdO0WtoDsEjTzU+bX0bwWYHJAM2Y3XhO0Ao=";
281-
allowedIPs = ["10.10.0.1/32"];
339+
allowedIPs = ["10.10.0.1/32" "10.10.0.101/32"];
282340
persistentKeepalive = 30;
283341
}
284342
# mm2
285343
{
286344
publicKey = "VcOEVp/0EG4luwL2bMmvGvlDNDbCzk7Vkazd3RRl51w=";
287-
allowedIPs = ["10.10.0.2/32"];
345+
allowedIPs = ["10.10.0.2/32" "10.10.0.102/32"];
288346
persistentKeepalive = 30;
289347
}
290348
];

0 commit comments

Comments
 (0)