Skip to content

Commit 46b66b4

Browse files
committed
draw the rest of the background task
1 parent 89483c0 commit 46b66b4

File tree

17 files changed

+500
-44
lines changed

17 files changed

+500
-44
lines changed

dev-tools/omdb/src/bin/omdb/db.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,7 @@ impl DbArgs {
13091309
sitrep::cmd_db_sitrep(&opctx, &datastore, &fetch_opts, args).await
13101310
}
13111311
DbCommands::Sitreps(args) => {
1312-
sitrep::cmd_db_sitrep_history(&datastore, &fetch_opts, args).await
1312+
sitrep::cmd_db_sitrep_history(&opctx, &datastore, &fetch_opts, args).await
13131313
}
13141314
DbCommands::Sleds(args) => {
13151315
cmd_db_sleds(&opctx, &datastore, &fetch_opts, args).await

dev-tools/omdb/src/bin/omdb/db/sitrep.rs

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
use crate::db::DbFetchOptions;
88
use crate::db::check_limit;
99
use crate::helpers::const_max_len;
10-
use crate::helpers::datetime_rfc3339_concise;
10+
use crate::helpers::datetime_opt_rfc3339_concise;
11+
use crate::helpers::display_option_blank;
1112
use anyhow::Context;
1213
use async_bb8_diesel::AsyncRunQueryDsl;
1314
use chrono::{DateTime, Utc};
@@ -17,7 +18,6 @@ use diesel::prelude::*;
1718
use nexus_db_queries::context::OpContext;
1819
use nexus_db_queries::db::DataStore;
1920
use nexus_db_queries::db::model;
20-
use nexus_db_queries::db::pagination::paginated;
2121
use nexus_types::fm;
2222
use omicron_common::api::external::DataPageParams;
2323
use omicron_common::api::external::PaginationOrder;
@@ -26,7 +26,6 @@ use omicron_uuid_kinds::SitrepUuid;
2626
use tabled::Tabled;
2727
use uuid::Uuid;
2828

29-
use nexus_db_schema::schema::fm_sitrep::dsl as sitrep_dsl;
3029
use nexus_db_schema::schema::fm_sitrep_history::dsl as history_dsl;
3130
use nexus_db_schema::schema::inv_collection::dsl as inv_collection_dsl;
3231

@@ -100,7 +99,7 @@ pub(super) async fn cmd_db_sitrep(
10099
) -> anyhow::Result<()> {
101100
match args.command {
102101
Commands::History(ref args) => {
103-
cmd_db_sitrep_history(datastore, fetch_opts, args).await
102+
cmd_db_sitrep_history(opctx, datastore, fetch_opts, args).await
104103
}
105104
Commands::Info { sitrep, ref args } => {
106105
cmd_db_sitrep_show(opctx, datastore, fetch_opts, args, sitrep).await
@@ -119,6 +118,7 @@ pub(super) async fn cmd_db_sitrep(
119118
}
120119

121120
pub(super) async fn cmd_db_sitrep_history(
121+
opctx: &OpContext,
122122
datastore: &DataStore,
123123
fetch_opts: &DbFetchOptions,
124124
args: &SitrepHistoryArgs,
@@ -138,53 +138,58 @@ pub(super) async fn cmd_db_sitrep_history(
138138
struct SitrepRow {
139139
v: u32,
140140
id: Uuid,
141-
#[tabled(display_with = "datetime_rfc3339_concise")]
142-
created_at: DateTime<Utc>,
141+
#[tabled(display_with = "display_option_blank")]
142+
orphans: Option<usize>,
143+
#[tabled(display_with = "datetime_opt_rfc3339_concise")]
144+
created_at: Option<DateTime<Utc>>,
143145
comment: String,
144146
}
145147

146-
let conn = datastore.pool_connection_for_tests().await?;
147148
let marker = args.from.map(model::SqlU32::new);
148149
let pagparams = DataPageParams {
149150
marker: marker.as_ref(),
150151
direction: PaginationOrder::Descending,
151152
limit: fetch_opts.fetch_limit,
152153
};
153-
let sitreps: Vec<(model::SitrepVersion, model::SitrepMetadata)> =
154-
paginated(
155-
history_dsl::fm_sitrep_history,
156-
history_dsl::version,
157-
&pagparams,
158-
)
159-
.inner_join(
160-
sitrep_dsl::fm_sitrep.on(history_dsl::sitrep_id.eq(sitrep_dsl::id)),
161-
)
162-
.select((
163-
model::SitrepVersion::as_select(),
164-
model::SitrepMetadata::as_select(),
165-
))
166-
.load_async(&*conn)
154+
let versions = datastore
155+
.fm_sitrep_version_list(&opctx, &pagparams)
167156
.await
168157
.with_context(ctx)?;
169158

170-
check_limit(&sitreps, fetch_opts.fetch_limit, ctx);
171-
172-
let rows = sitreps.into_iter().map(|(version, metadata)| {
173-
let model::SitrepMetadata {
174-
id,
175-
time_created,
176-
comment,
177-
creator_id: _,
178-
parent_sitrep_id: _,
179-
inv_collection_id: _,
180-
} = metadata;
181-
SitrepRow {
182-
v: version.version.into(),
183-
id: id.into_untyped_uuid(),
159+
check_limit(&versions, fetch_opts.fetch_limit, ctx);
160+
161+
let mut rows = Vec::with_capacity(versions.len());
162+
for v in versions {
163+
let orphans = match datastore.fm_sitrep_list_orphaned(&opctx, &v).await
164+
{
165+
Ok(o) => Some(o.len()),
166+
Err(e) => {
167+
eprintln!(
168+
"failed to list orphaned sitreps at v{}: {e}",
169+
v.version
170+
);
171+
None
172+
}
173+
};
174+
let (comment, time_created) =
175+
match datastore.fm_sitrep_metadata_read(&opctx, v.id).await {
176+
Ok(s) => (s.comment, Some(s.time_created)),
177+
Err(e) => {
178+
eprintln!(
179+
"failed to get fetch metadata for sitrep {} (v{}): {e}",
180+
v.id, v.version
181+
);
182+
("<ERROR>".to_string(), None)
183+
}
184+
};
185+
rows.push(SitrepRow {
186+
v: v.version,
187+
id: v.id.into_untyped_uuid(),
188+
orphans,
184189
created_at: time_created,
185190
comment,
186-
}
187-
});
191+
});
192+
}
188193

189194
let table = tabled::Table::new(rows)
190195
.with(tabled::settings::Style::empty())

dev-tools/omdb/src/bin/omdb/nexus.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,15 @@ use nexus_types::internal_api::background::InstanceReincarnationStatus;
6060
use nexus_types::internal_api::background::InstanceUpdaterStatus;
6161
use nexus_types::internal_api::background::InventoryLoadStatus;
6262
use nexus_types::internal_api::background::LookupRegionPortStatus;
63+
use nexus_types::internal_api::background::OrphanedSitreps;
6364
use nexus_types::internal_api::background::ReadOnlyRegionReplacementStartStatus;
6465
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
6566
use nexus_types::internal_api::background::RegionReplacementStatus;
6667
use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus;
6768
use nexus_types::internal_api::background::RegionSnapshotReplacementGarbageCollectStatus;
6869
use nexus_types::internal_api::background::RegionSnapshotReplacementStartStatus;
6970
use nexus_types::internal_api::background::RegionSnapshotReplacementStepStatus;
71+
use nexus_types::internal_api::background::SitrepGcStatus;
7072
use nexus_types::internal_api::background::SitrepLoadStatus;
7173
use nexus_types::internal_api::background::SupportBundleCleanupReport;
7274
use nexus_types::internal_api::background::SupportBundleCollectionReport;
@@ -1240,6 +1242,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
12401242
"fm_sitrep_loader" => {
12411243
print_task_fm_sitrep_loader(details);
12421244
}
1245+
"fm_sitrep_gc" => {
1246+
print_task_fm_sitrep_gc(details);
1247+
}
12431248
_ => {
12441249
println!(
12451250
"warning: unknown background task: {:?} \
@@ -3142,6 +3147,75 @@ fn print_task_fm_sitrep_loader(details: &serde_json::Value) {
31423147
};
31433148
}
31443149

3150+
fn print_task_fm_sitrep_gc(details: &serde_json::Value) {
3151+
let SitrepGcStatus { versions_scanned, orphaned_sitreps, errors } =
3152+
match serde_json::from_value::<SitrepGcStatus>(details.clone()) {
3153+
Err(error) => {
3154+
eprintln!(
3155+
"warning: failed to interpret task details: {:?}: {:?}",
3156+
error, details
3157+
);
3158+
return;
3159+
}
3160+
Ok(status) => status,
3161+
};
3162+
3163+
pub const TOTAL_VERSIONS_SCANNED: &str = "total versions scanned:";
3164+
pub const VERSIONS_WITH_ORPHANS: &str = "versions with orphans:";
3165+
pub const ORPHANS_FOUND: &str = "total orphaned sitreps found:";
3166+
pub const ORPHANS_DELETED: &str = "total orphaned sitreps deleted:";
3167+
pub const ERRORS: &str = "errors:";
3168+
pub const WIDTH: usize = const_max_len(&[
3169+
TOTAL_VERSIONS_SCANNED,
3170+
VERSIONS_WITH_ORPHANS,
3171+
ERRORS,
3172+
ORPHANS_FOUND,
3173+
ORPHANS_DELETED,
3174+
]) + 1;
3175+
pub const NUM_WIDTH: usize = 4;
3176+
3177+
#[derive(tabled::Tabled)]
3178+
struct OrphanedSitrepsRow {
3179+
version: u32,
3180+
found: usize,
3181+
deleted: usize,
3182+
}
3183+
let mut total_found = 0;
3184+
let mut total_deleted = 0;
3185+
let rows: Vec<_> = orphaned_sitreps
3186+
.iter()
3187+
.map(|(&version, &OrphanedSitreps { found, deleted })| {
3188+
total_found += found;
3189+
total_deleted += deleted;
3190+
OrphanedSitrepsRow { version, found, deleted }
3191+
})
3192+
.collect();
3193+
3194+
if !errors.is_empty() {
3195+
println!("{ERRICON} {ERRORS:<WIDTH$}{:>NUM_WIDTH$}", errors.len());
3196+
for error in errors {
3197+
println!(" > {error}")
3198+
}
3199+
}
3200+
println!(
3201+
" {TOTAL_VERSIONS_SCANNED:<WIDTH$}{versions_scanned:>NUM_WIDTH$}"
3202+
);
3203+
println!(
3204+
" {VERSIONS_WITH_ORPHANS:<WIDTH$}{:>NUM_WIDTH$}",
3205+
orphaned_sitreps.len()
3206+
);
3207+
println!(" {ORPHANS_FOUND:<WIDTH$}{total_found:>NUM_WIDTH$}");
3208+
println!(" {ORPHANS_DELETED:<WIDTH$}{total_deleted:>NUM_WIDTH$}");
3209+
3210+
// Don't print the table listing the number of GC'd sitreps per version
3211+
// if it's empty, 'cause it looks kinda weird.
3212+
if !rows.is_empty() {
3213+
let mut table = tabled::Table::new(rows);
3214+
bgtask_apply_kv_style(&mut table);
3215+
println!("{table}");
3216+
}
3217+
}
3218+
31453219
const ERRICON: &str = "/!\\";
31463220

31473221
fn warn_if_nonzero(n: usize) -> &'static str {

dev-tools/omdb/tests/env.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ task: "external_endpoints"
9999
on each one
100100

101101

102+
task: "fm_sitrep_gc"
103+
garbage collects fault management situation reports
104+
105+
102106
task: "fm_sitrep_loader"
103107
loads the current fault management situation report from the database
104108

@@ -319,6 +323,10 @@ task: "external_endpoints"
319323
on each one
320324

321325

326+
task: "fm_sitrep_gc"
327+
garbage collects fault management situation reports
328+
329+
322330
task: "fm_sitrep_loader"
323331
loads the current fault management situation report from the database
324332

@@ -526,6 +534,10 @@ task: "external_endpoints"
526534
on each one
527535

528536

537+
task: "fm_sitrep_gc"
538+
garbage collects fault management situation reports
539+
540+
529541
task: "fm_sitrep_loader"
530542
loads the current fault management situation report from the database
531543

dev-tools/omdb/tests/successes.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,10 @@ task: "external_endpoints"
334334
on each one
335335

336336

337+
task: "fm_sitrep_gc"
338+
garbage collects fault management situation reports
339+
340+
337341
task: "fm_sitrep_loader"
338342
loads the current fault management situation report from the database
339343

@@ -619,6 +623,15 @@ task: "external_endpoints"
619623

620624
TLS certificates: 0
621625

626+
task: "fm_sitrep_gc"
627+
configured period: every <REDACTED_DURATION>s
628+
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
629+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
630+
total versions scanned: 0
631+
versions with orphans: 0
632+
total orphaned sitreps found: 0
633+
total orphaned sitreps deleted: 0
634+
622635
task: "fm_sitrep_loader"
623636
configured period: every <REDACTED_DURATION>s
624637
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
@@ -1166,6 +1179,15 @@ task: "external_endpoints"
11661179

11671180
TLS certificates: 0
11681181

1182+
task: "fm_sitrep_gc"
1183+
configured period: every <REDACTED_DURATION>s
1184+
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
1185+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
1186+
total versions scanned: 0
1187+
versions with orphans: 0
1188+
total orphaned sitreps found: 0
1189+
total orphaned sitreps deleted: 0
1190+
11691191
task: "fm_sitrep_loader"
11701192
configured period: every <REDACTED_DURATION>s
11711193
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>

nexus-config/src/nexus_config.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,11 +879,21 @@ pub struct FmTasksConfig {
879879
/// reads the latest fault management sitrep from the database.
880880
#[serde_as(as = "DurationSeconds<u64>")]
881881
pub sitrep_load_period_secs: Duration,
882+
/// period (in seconds) for periodic activations of the background task that
883+
/// garbage collects unneeded fault management sitreps in the database.
884+
#[serde_as(as = "DurationSeconds<u64>")]
885+
pub sitrep_gc_period_secs: Duration,
882886
}
883887

884888
impl Default for FmTasksConfig {
885889
fn default() -> Self {
886-
Self { sitrep_load_period_secs: Duration::from_secs(15) }
890+
Self {
891+
sitrep_load_period_secs: Duration::from_secs(15),
892+
// This need not be activated very frequently, as it's triggered any
893+
// time the current sitrep changes, and activating it more
894+
// frequently won't make things more responsive.
895+
sitrep_gc_period_secs: Duration::from_secs(600),
896+
}
887897
}
888898
}
889899

@@ -1190,6 +1200,7 @@ mod test {
11901200
webhook_deliverator.second_retry_backoff_secs = 46
11911201
sp_ereport_ingester.period_secs = 47
11921202
fm.sitrep_load_period_secs = 48
1203+
fm.sitrep_gc_period_secs = 49
11931204
[default_region_allocation_strategy]
11941205
type = "random"
11951206
seed = 0
@@ -1436,6 +1447,7 @@ mod test {
14361447
},
14371448
fm: FmTasksConfig {
14381449
sitrep_load_period_secs: Duration::from_secs(48),
1450+
sitrep_gc_period_secs: Duration::from_secs(49),
14391451
}
14401452
},
14411453
default_region_allocation_strategy:
@@ -1536,6 +1548,7 @@ mod test {
15361548
webhook_deliverator.period_secs = 43
15371549
sp_ereport_ingester.period_secs = 44
15381550
fm.sitrep_load_period_secs = 45
1551+
fm.sitrep_gc_period_secs = 46
15391552
15401553
[default_region_allocation_strategy]
15411554
type = "random"

nexus/background-task-interface/src/init.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub struct BackgroundTasks {
5252
pub task_sp_ereport_ingester: Activator,
5353
pub task_reconfigurator_config_loader: Activator,
5454
pub task_fm_sitrep_loader: Activator,
55+
pub task_fm_sitrep_gc: Activator,
5556

5657
// Handles to activate background tasks that do not get used by Nexus
5758
// at-large. These background tasks are implementation details as far as

0 commit comments

Comments
 (0)