Skip to content

Commit 4258954

Browse files
committed
Refactor how RSS constructs the initial blueprint
1 parent 071b5d1 commit 4258954

File tree

4 files changed

+265
-325
lines changed

4 files changed

+265
-325
lines changed

sled-agent/src/rack_setup/plan/service.rs

Lines changed: 141 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44

55
//! Plan generation for "where should services be initialized".
66
7+
use anyhow::anyhow;
8+
use anyhow::bail;
9+
use chrono::Utc;
710
use iddqd::IdOrdMap;
811
use iddqd::errors::DuplicateItem;
12+
use iddqd::id_upcast;
913
use illumos_utils::zpool::ZpoolName;
1014
use internal_dns_types::config::{
1115
DnsConfigBuilder, DnsConfigParams, Host, Zone,
@@ -15,12 +19,16 @@ use nexus_sled_agent_shared::inventory::{
1519
Inventory, OmicronZoneDataset, SledRole,
1620
};
1721
use nexus_types::deployment::{
18-
BlueprintPhysicalDiskConfig, BlueprintPhysicalDiskDisposition,
22+
Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition,
23+
BlueprintHostPhase2DesiredSlots, BlueprintPhysicalDiskConfig,
24+
BlueprintPhysicalDiskDisposition, BlueprintSledConfig, BlueprintSource,
1925
BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneImageSource,
20-
BlueprintZoneType, OmicronZoneExternalFloatingAddr,
21-
OmicronZoneExternalFloatingIp, OmicronZoneExternalSnatIp,
26+
BlueprintZoneType, CockroachDbPreserveDowngrade,
27+
OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp,
28+
OmicronZoneExternalSnatIp, OximeterReadMode, PendingMgsUpdates,
2229
blueprint_zone_type,
2330
};
31+
use nexus_types::external_api::views::SledState;
2432
use omicron_common::address::{
2533
DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, Ipv6Subnet, MGD_PORT, MGS_PORT,
2634
NEXUS_INTERNAL_PORT, NEXUS_LOCKSTEP_PORT, NTP_PORT, NUM_SOURCE_NAT_PORTS,
@@ -46,7 +54,7 @@ use omicron_common::policy::{
4654
SINGLE_NODE_CLICKHOUSE_REDUNDANCY,
4755
};
4856
use omicron_uuid_kinds::{
49-
DatasetUuid, ExternalIpUuid, GenericUuid, OmicronZoneUuid,
57+
BlueprintUuid, DatasetUuid, ExternalIpUuid, GenericUuid, OmicronZoneUuid,
5058
PhysicalDiskUuid, SledUuid, ZpoolUuid,
5159
};
5260
use rand::seq::IndexedRandom;
@@ -58,6 +66,7 @@ use sled_agent_client::{
5866
use sled_agent_types::rack_init::RackInitializeRequest as Config;
5967
use sled_agent_types::sled::StartSledAgentRequest;
6068
use slog::Logger;
69+
use slog_error_chain::InlineErrorChain;
6170
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
6271
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
6372
use std::num::Wrapping;
@@ -158,9 +167,26 @@ impl SledConfig {
158167
}
159168
}
160169

161-
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
170+
#[derive(Clone, Debug)]
171+
pub(crate) struct PlannedSledDescription {
172+
pub(crate) underlay_address: SocketAddrV6,
173+
pub(crate) sled_id: SledUuid,
174+
pub(crate) config: SledConfig,
175+
}
176+
177+
impl iddqd::IdOrdItem for PlannedSledDescription {
178+
type Key<'a> = SocketAddrV6;
179+
180+
fn key(&self) -> Self::Key<'_> {
181+
self.underlay_address
182+
}
183+
184+
id_upcast!();
185+
}
186+
187+
#[derive(Clone, Debug)]
162188
pub struct Plan {
163-
pub services: HashMap<SocketAddrV6, SledConfig>,
189+
pub all_sleds: IdOrdMap<PlannedSledDescription>,
164190
pub dns_config: DnsConfigParams,
165191
}
166192

@@ -210,29 +236,6 @@ pub fn from_source_nat_config_to_external_snat_ip(
210236
}
211237

212238
impl Plan {
213-
async fn is_sled_scrimlet(
214-
log: &Logger,
215-
address: SocketAddrV6,
216-
) -> Result<bool, PlanError> {
217-
let dur = std::time::Duration::from_secs(60);
218-
let client = reqwest::ClientBuilder::new()
219-
.connect_timeout(dur)
220-
.timeout(dur)
221-
.build()
222-
.map_err(PlanError::HttpClient)?;
223-
let client = SledAgentClient::new_with_client(
224-
&format!("http://{}", address),
225-
client,
226-
log.new(o!("SledAgentClient" => address.to_string())),
227-
);
228-
229-
let role = client.sled_role_get().await?.into_inner();
230-
match role {
231-
SledRole::Gimlet => Ok(false),
232-
SledRole::Scrimlet => Ok(true),
233-
}
234-
}
235-
236239
async fn get_inventory(
237240
log: &Logger,
238241
address: SocketAddrV6,
@@ -804,13 +807,17 @@ impl Plan {
804807
.expect("freshly generated zone IDs are unique");
805808
}
806809

807-
let services: HashMap<_, _> = sled_info
810+
let all_sleds = sled_info
808811
.into_iter()
809-
.map(|sled_info| (sled_info.sled_address, sled_info.request))
812+
.map(|sled_info| PlannedSledDescription {
813+
underlay_address: sled_info.sled_address,
814+
sled_id: sled_info.sled_id,
815+
config: sled_info.request,
816+
})
810817
.collect();
811818

812819
let dns_config = dns_builder.build_full_config_for_initial_generation();
813-
Ok(Self { services, dns_config })
820+
Ok(Self { all_sleds, dns_config })
814821
}
815822

816823
pub async fn create(
@@ -828,8 +835,10 @@ impl Plan {
828835
let sled_address = get_sled_address(subnet);
829836
let inventory =
830837
Self::get_inventory(log, sled_address).await?;
831-
let is_scrimlet =
832-
Self::is_sled_scrimlet(log, sled_address).await?;
838+
let is_scrimlet = match inventory.sled_role {
839+
SledRole::Gimlet => false,
840+
SledRole::Scrimlet => true,
841+
};
833842
Ok(SledInfo::new(
834843
sled_request.body.id,
835844
subnet,
@@ -846,6 +855,102 @@ impl Plan {
846855
let plan = Self::create_transient(config, sled_info)?;
847856
Ok(plan)
848857
}
858+
859+
pub(crate) fn to_blueprint(
860+
&self,
861+
sled_agent_config_generation: Generation,
862+
) -> anyhow::Result<Blueprint> {
863+
let mut blueprint_sleds = BTreeMap::new();
864+
for sled_description in &self.all_sleds {
865+
let sled_config = &sled_description.config;
866+
let mut datasets = IdOrdMap::new();
867+
for d in sled_config.datasets.values() {
868+
// Only the "Crucible" dataset needs to know the address
869+
let address = if *d.name.kind() == DatasetKind::Crucible {
870+
let address = sled_config.zones.iter().find_map(|z| {
871+
if let BlueprintZoneType::Crucible(
872+
blueprint_zone_type::Crucible { address, dataset },
873+
) = &z.zone_type
874+
{
875+
if &dataset.pool_name == d.name.pool() {
876+
return Some(*address);
877+
}
878+
};
879+
None
880+
});
881+
if address.is_some() {
882+
address
883+
} else {
884+
bail!(
885+
"could not find Crucible zone for zpool {}",
886+
d.name.pool()
887+
)
888+
}
889+
} else {
890+
None
891+
};
892+
893+
datasets
894+
.insert_unique(BlueprintDatasetConfig {
895+
disposition: BlueprintDatasetDisposition::InService,
896+
id: d.id,
897+
pool: *d.name.pool(),
898+
kind: d.name.kind().clone(),
899+
address,
900+
compression: d.inner.compression,
901+
quota: d.inner.quota,
902+
reservation: d.inner.reservation,
903+
})
904+
.map_err(|e| {
905+
anyhow!(InlineErrorChain::new(&e).to_string())
906+
})?;
907+
}
908+
909+
blueprint_sleds.insert(
910+
sled_description.sled_id,
911+
BlueprintSledConfig {
912+
state: SledState::Active,
913+
sled_agent_generation: sled_agent_config_generation,
914+
disks: sled_config.disks.clone(),
915+
datasets,
916+
zones: sled_config.zones.clone(),
917+
host_phase_2:
918+
BlueprintHostPhase2DesiredSlots::current_contents(),
919+
remove_mupdate_override: None,
920+
},
921+
);
922+
}
923+
924+
let id = BlueprintUuid::new_v4();
925+
Ok(Blueprint {
926+
id,
927+
sleds: blueprint_sleds,
928+
pending_mgs_updates: PendingMgsUpdates::new(),
929+
parent_blueprint_id: None,
930+
internal_dns_version: self.dns_config.generation,
931+
// We don't configure external DNS during RSS, so set it to an
932+
// initial generation of 1. Nexus will bump this up when it updates
933+
// external DNS (including creating the recovery silo).
934+
external_dns_version: Generation::new(),
935+
target_release_minimum_generation: Generation::new(),
936+
nexus_generation: Generation::new(),
937+
// Nexus will fill in the CockroachDB values during initialization.
938+
cockroachdb_fingerprint: String::new(),
939+
cockroachdb_setting_preserve_downgrade:
940+
CockroachDbPreserveDowngrade::DoNotModify,
941+
// We do not create clickhouse clusters in RSS. We create them via
942+
// reconfigurator only.
943+
clickhouse_cluster_config: None,
944+
// The oximeter read policy always defaults to single node. The
945+
// initial generation of this policy in the DB is 1
946+
oximeter_read_mode: OximeterReadMode::SingleNode,
947+
oximeter_read_version: Generation::new(),
948+
time_created: Utc::now(),
949+
creator: "RSS".to_string(),
950+
comment: "initial blueprint from rack setup".to_string(),
951+
source: BlueprintSource::Rss,
952+
})
953+
}
849954
}
850955

851956
struct AddressBumpAllocator {
@@ -1429,9 +1534,9 @@ mod tests {
14291534
let plan = Plan::create_transient(&config, sleds)
14301535
.expect("Should have created plan");
14311536

1432-
assert_eq!(plan.services.len(), 1);
1537+
assert_eq!(plan.all_sleds.len(), 1);
14331538

1434-
let sled_config = plan.services.iter().next().unwrap().1;
1539+
let sled_config = &plan.all_sleds.iter().next().unwrap().config;
14351540
assert_eq!(sled_config.disks.len(), DISK_COUNT);
14361541

14371542
let zone_count = sled_config.zones.len();

sled-agent/src/rack_setup/plan/sled.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
77
use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT;
88
use omicron_uuid_kinds::SledUuid;
9-
use schemars::JsonSchema;
10-
use serde::{Deserialize, Serialize};
119
use sled_agent_types::rack_init::RackInitializeRequest as Config;
1210
use sled_agent_types::sled::StartSledAgentRequest;
1311
use sled_agent_types::sled::StartSledAgentRequestBody;
@@ -16,7 +14,7 @@ use std::collections::{BTreeMap, BTreeSet};
1614
use std::net::{Ipv6Addr, SocketAddrV6};
1715
use uuid::Uuid;
1816

19-
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
17+
#[derive(Clone, Debug)]
2018
pub struct Plan {
2119
pub rack_id: Uuid,
2220
pub sleds: BTreeMap<SocketAddrV6, StartSledAgentRequest>,
@@ -63,8 +61,6 @@ impl Plan {
6361
)
6462
});
6563

66-
info!(log, "Serializing plan");
67-
6864
let mut sleds = BTreeMap::new();
6965
for (addr, allocation) in allocations {
7066
sleds.insert(addr, allocation);

0 commit comments

Comments
 (0)