From 173c6ce23b87771534c6c9d1e3dad1bf58b5627f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 5 May 2023 01:29:59 -0400 Subject: [PATCH 1/2] [sled-agent] Vectorize the call to 'create datasets' --- openapi/sled-agent.json | 30 +++++--- sled-agent/src/http_entrypoints.rs | 14 +--- sled-agent/src/params.rs | 23 +++--- sled-agent/src/rack_setup/plan/service.rs | 92 +++++++++++++---------- sled-agent/src/rack_setup/service.rs | 55 +++++++------- sled-agent/src/sled_agent.rs | 83 +++++++++++--------- sled-agent/src/storage_manager.rs | 35 ++++----- 7 files changed, 183 insertions(+), 149 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b836658cdf5..ccb0eae4c67 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -56,7 +56,7 @@ }, "/filesystem": { "put": { - "operationId": "filesystem_put", + "operationId": "filesystems_put", "requestBody": { "content": { "application/json": { @@ -609,14 +609,29 @@ ] }, "DatasetEnsureBody": { + "description": "Used to request that the Sled initialize multiple datasets.", + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetEnsureRequest" + } + } + }, + "required": [ + "datasets" + ] + }, + "DatasetEnsureRequest": { "description": "Used to request a new dataset kind exists within a zpool.\n\nMany dataset types are associated with services that will be instantiated when the dataset is detected.", "type": "object", "properties": { "address": { "type": "string" }, - "dataset_kind": { - "$ref": "#/components/schemas/DatasetKind" + "dataset_name": { + "$ref": "#/components/schemas/DatasetName" }, "gz_address": { "nullable": true, @@ -627,17 +642,12 @@ "id": { "type": "string", "format": "uuid" - }, - "zpool_id": { - "type": "string", - "format": "uuid" } }, "required": [ "address", - "dataset_kind", - "id", - "zpool_id" + "dataset_name", + "id" ] }, "DatasetKind": { diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 3c82fbaf590..e38b41d9685 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -31,7 +31,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(disk_put)?; - api.register(filesystem_put)?; + api.register(filesystems_put)?; api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; @@ -96,21 +96,13 @@ async fn sled_role_get( method = PUT, path = "/filesystem", }] -async fn filesystem_put( +async fn filesystems_put( rqctx: RequestContext, body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.filesystem_ensure( - body_args.id, - body_args.zpool_id, - body_args.dataset_kind, - body_args.address, - body_args.gz_address, - ) - .await - .map_err(|e| Error::from(e))?; + sa.filesystems_ensure(body_args).await.map_err(|e| Error::from(e))?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 972e95e58bf..08c9f18cc25 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -311,18 +311,22 @@ impl std::fmt::Display for DatasetKind { } } +/// Used to request that the Sled initialize multiple datasets. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct DatasetEnsureBody { + pub datasets: Vec, +} + /// Used to request a new dataset kind exists within a zpool. /// /// Many dataset types are associated with services that will be /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct DatasetEnsureBody { +pub struct DatasetEnsureRequest { // The UUID of the dataset, as well as the service using it directly. pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. - pub zpool_id: Uuid, - // The type of the filesystem. - pub dataset_kind: DatasetKind, + pub dataset_name: crate::storage::dataset::DatasetName, // The address on which the zone will listen for requests. pub address: SocketAddrV6, // The addresses in the global zone which should be created, if necessary @@ -331,14 +335,15 @@ pub struct DatasetEnsureBody { pub gz_address: Option, } -impl From for sled_agent_client::types::DatasetEnsureBody { - fn from(p: DatasetEnsureBody) -> Self { +impl From + for sled_agent_client::types::DatasetEnsureRequest +{ + fn from(p: DatasetEnsureRequest) -> Self { Self { - zpool_id: p.zpool_id, - dataset_kind: p.dataset_kind.into(), + id: p.id, + dataset_name: p.dataset_name.into(), address: p.address.to_string(), gz_address: p.gz_address, - id: p.id, } } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index ff57a944a2c..b127d560ad8 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -7,13 +7,15 @@ use crate::bootstrap::params::SledAgentRequest; use crate::ledger::{Ledger, Ledgerable}; use crate::params::{ - DatasetEnsureBody, ServiceType, ServiceZoneRequest, ServiceZoneService, + DatasetEnsureRequest, ServiceType, ServiceZoneRequest, ServiceZoneService, ZoneType, }; use crate::rack_setup::config::SetupServiceConfig as Config; +use crate::storage::dataset::DatasetName; use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; +use illumos_utils::zpool::ZpoolName; use internal_dns::{ServiceName, DNS_ZONE}; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -93,7 +95,7 @@ pub enum PlanError { pub struct SledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] - pub datasets: Vec, + pub datasets: Vec, /// Services to be instantiated. #[serde(default, rename = "service")] @@ -165,7 +167,7 @@ impl Plan { async fn get_u2_zpools_from_sled( log: &Logger, address: SocketAddrV6, - ) -> Result, PlanError> { + ) -> Result, PlanError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -179,7 +181,7 @@ impl Plan { ); let get_u2_zpools = || async { - let zpools: Vec = client + let zpools: Vec = client .zpools_get() .await .map(|response| { @@ -187,7 +189,9 @@ impl Plan { .into_inner() .into_iter() .filter_map(|zpool| match zpool.disk_type { - SledAgentTypes::DiskType::U2 => Some(zpool.id), + SledAgentTypes::DiskType::U2 => { + Some(ZpoolName::new_external(zpool.id)) + } SledAgentTypes::DiskType::M2 => None, }) .collect() @@ -298,19 +302,21 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_dns(id, &mut services_ip_pool)?; - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::ExternalDns { - http_address: SocketAddrV6::new( - internal_ip, - http_port, - 0, - 0, - ), - dns_address: SocketAddr::new(external_ip, dns_port), - nic, - }, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::ExternalDns { + http_address: SocketAddrV6::new( + internal_ip, + http_port, + 0, + 0, + ), + dns_address: SocketAddr::new(external_ip, dns_port), + nic, + }, + ), address: http_address, gz_address: None, }); @@ -384,10 +390,12 @@ impl Plan { dns_builder .service_backend_zone(ServiceName::Cockroach, &zone, port) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::CockroachDb, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::CockroachDb, + ), address, gz_address: None, }); @@ -403,10 +411,12 @@ impl Plan { dns_builder .service_backend_zone(ServiceName::Clickhouse, &zone, port) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::Clickhouse, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::Clickhouse, + ), address, gz_address: None, }); @@ -415,7 +425,7 @@ impl Plan { // Each zpool gets a crucible zone. // // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove - for zpool_id in &u2_zpools { + for pool in &u2_zpools { let ip = addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); @@ -429,10 +439,12 @@ impl Plan { ) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: *zpool_id, - dataset_kind: crate::params::DatasetKind::Crucible, + dataset_name: DatasetName::new( + pool.clone(), + crate::params::DatasetKind::Crucible, + ), address, gz_address: None, }); @@ -454,18 +466,22 @@ impl Plan { DNS_HTTP_PORT, ) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::InternalDns { - http_address: SocketAddrV6::new( - dns_ip, - DNS_HTTP_PORT, - 0, - 0, - ), - dns_address: SocketAddrV6::new(dns_ip, DNS_PORT, 0, 0), - }, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::InternalDns { + http_address: SocketAddrV6::new( + dns_ip, + DNS_HTTP_PORT, + 0, + 0, + ), + dns_address: SocketAddrV6::new( + dns_ip, DNS_PORT, 0, 0, + ), + }, + ), address: http_address, gz_address: Some(dns_subnet.gz_address().ip()), }); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index f2e6666bc3f..98bd0d4f1fc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -62,7 +62,7 @@ use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::ledger::{Ledger, Ledgerable}; use crate::nexus::d2n_params; use crate::params::{ - AutonomousServiceOnlyError, DatasetEnsureBody, DatasetKind, ServiceType, + AutonomousServiceOnlyError, DatasetEnsureRequest, DatasetKind, ServiceType, ServiceZoneRequest, TimeSync, ZoneType, }; use crate::rack_setup::plan::service::{ @@ -246,7 +246,7 @@ impl ServiceInner { async fn initialize_datasets( &self, sled_address: SocketAddrV6, - datasets: &Vec, + datasets: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -261,26 +261,29 @@ impl ServiceInner { self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); + let datasets = + datasets.iter().map(|d| d.clone().into()).collect::>(); + info!(self.log, "sending dataset requests..."); - for dataset in datasets { - let filesystem_put = || async { - info!(self.log, "creating new filesystem: {:?}", dataset); - client - .filesystem_put(&dataset.clone().into()) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>>(()) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to create filesystem"; "error" => ?error); - }; - retry_notify( - retry_policy_internal_service_aggressive(), - filesystem_put, - log_failure, - ) - .await?; - } + let filesystem_put = || async { + info!(self.log, "creating new filesystems: {:?}", datasets); + client + .filesystems_put(&SledAgentTypes::DatasetEnsureBody { + datasets: datasets.clone(), + }) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to create filesystem"; "error" => ?error); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + filesystem_put, + log_failure, + ) + .await?; Ok(()) } @@ -348,7 +351,7 @@ impl ServiceInner { .iter() .filter_map(|dataset| { if matches!( - dataset.dataset_kind, + dataset.dataset_name.dataset(), DatasetKind::InternalDns { .. } ) { Some(dataset.clone()) @@ -378,8 +381,8 @@ impl ServiceInner { .datasets .iter() .filter_map(|dataset| { - match dataset.dataset_kind { - DatasetKind::InternalDns { http_address, .. } => Some(http_address), + match dataset.dataset_name.dataset() { + DatasetKind::InternalDns { http_address, .. } => Some(http_address.clone()), _ => None, } }) @@ -703,11 +706,11 @@ impl ServiceInner { for dataset in service_request.datasets.iter() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset.zpool_id, + zpool_id: dataset.dataset_name.pool().id(), dataset_id: dataset.id, request: NexusTypes::DatasetPutRequest { address: dataset.address.to_string(), - kind: dataset.dataset_kind.clone().into(), + kind: dataset.dataset_name.dataset().clone().into(), }, }) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 0b337d14894..8ae7c146581 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -9,7 +9,7 @@ use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, + DatasetEnsureBody, DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, ServiceEnsureBody, ServiceZoneService, SledRole, TimeSync, VpcFirewallRule, Zpool, @@ -510,45 +510,56 @@ impl SledAgent { } } - /// Ensures that a filesystem type exists within the zpool. - pub async fn filesystem_ensure( + /// Ensures that all filesystem type exists within the zpool. + pub async fn filesystems_ensure( &self, - dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, - address: SocketAddrV6, - gz_address: Option, + requested_datasets: DatasetEnsureBody, ) -> Result<(), Error> { - // First, ensure the dataset exists - let dataset = self - .inner - .storage - .upsert_filesystem(dataset_id, zpool_id, dataset_kind.clone()) - .await?; + // TODO: + // - If these are the set of filesystems, we should also consider + // removing the ones which are not listed here. + // - It's probably worth sending a bulk request to the storage system, + // rather than requesting individual datasets. + for dataset in &requested_datasets.datasets { + let dataset_id = dataset.id; + + // First, ensure the dataset exists + self.inner + .storage + .upsert_filesystem(dataset_id, dataset.dataset_name.clone()) + .await?; + } - // NOTE: We use the "dataset_id" as the "service_id" here. - // - // Since datasets are tightly coupled with their own services - e.g., - // from the perspective of Nexus, provisioning a dataset implies the - // sled should start a service - this is ID re-use is reasonable. - // - // If Nexus ever wants sleds to provision datasets independently of - // launching services, this ID type overlap should be reconsidered. - let service_type = dataset_kind.service_type(); - let services = - vec![ServiceZoneService { id: dataset_id, details: service_type }]; - - // Next, ensure a zone exists to manage storage for that dataset - let request = crate::params::ServiceZoneRequest { - id: dataset_id, - zone_type: dataset_kind.zone_type(), - addresses: vec![*address.ip()], - dataset: Some(dataset), - gz_addresses: gz_address.into_iter().collect(), - services, - }; - self.inner.services.ensure_storage_service(request).await?; + for dataset in &requested_datasets.datasets { + let dataset_id = dataset.id; + let address = dataset.address; + let gz_address = dataset.gz_address; + // NOTE: We use the "dataset_id" as the "service_id" here. + // + // Since datasets are tightly coupled with their own services - e.g., + // from the perspective of Nexus, provisioning a dataset implies the + // sled should start a service - this is ID re-use is reasonable. + // + // If Nexus ever wants sleds to provision datasets independently of + // launching services, this ID type overlap should be reconsidered. + let service_type = dataset.dataset_name.dataset().service_type(); + let services = vec![ServiceZoneService { + id: dataset_id, + details: service_type, + }]; + + // Next, ensure a zone exists to manage storage for that dataset + let request = crate::params::ServiceZoneRequest { + id: dataset_id, + zone_type: dataset.dataset_name.dataset().zone_type(), + addresses: vec![*address.ip()], + dataset: Some(dataset.dataset_name.clone()), + gz_addresses: gz_address.into_iter().collect(), + services, + }; + self.inner.services.ensure_storage_service(request).await?; + } Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index ce7d3cbdc0a..65b45f75628 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,7 +5,6 @@ //! Management of sled-local storage. use crate::nexus::LazyNexusClient; -use crate::params::DatasetKind; use crate::storage::dataset::DatasetName; use camino::Utf8PathBuf; use futures::stream::FuturesOrdered; @@ -154,8 +153,7 @@ type NotifyFut = #[derive(Debug)] struct NewFilesystemRequest { dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, responder: oneshot::Sender>, } @@ -688,14 +686,18 @@ impl StorageWorker { ) -> Result { info!(self.log, "add_dataset: {:?}", request); let mut pools = resources.pools.lock().await; - let pool = pools.get_mut(&request.zpool_id).ok_or_else(|| { - Error::ZpoolNotFound(format!( - "{}, looked up while trying to add dataset", - request.zpool_id - )) - })?; - let dataset_name = - DatasetName::new(pool.name.clone(), request.dataset_kind.clone()); + let pool = pools + .get_mut(&request.dataset_name.pool().id()) + .ok_or_else(|| { + Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.dataset_name.pool(), + )) + })?; + let dataset_name = DatasetName::new( + pool.name.clone(), + request.dataset_name.dataset().clone(), + ); self.ensure_dataset(request.dataset_id, &dataset_name)?; Ok(dataset_name) } @@ -917,16 +919,11 @@ impl StorageManager { pub async fn upsert_filesystem( &self, dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, ) -> Result { let (tx, rx) = oneshot::channel(); - let request = NewFilesystemRequest { - dataset_id, - zpool_id, - dataset_kind, - responder: tx, - }; + let request = + NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; self.inner .tx From ad7fe487576bd3b0d9e36aa958305f0cbe4e8149 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 5 May 2023 11:29:33 -0400 Subject: [PATCH 2/2] clippy --- sled-agent/src/rack_setup/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 98bd0d4f1fc..52adba5d07e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -382,7 +382,7 @@ impl ServiceInner { .iter() .filter_map(|dataset| { match dataset.dataset_name.dataset() { - DatasetKind::InternalDns { http_address, .. } => Some(http_address.clone()), + DatasetKind::InternalDns { http_address, .. } => Some(*http_address), _ => None, } })