Avoid a storage RTT when loading ChannelMonitors without updates

TheBlueMatt · TheBlueMatt · commit 786750c8d074 · 2025-11-10T15:54:41.000Z
When reading `ChannelMonitor`s from a `MonitorUpdatingPersister` on
startup, we have to make sure to load any `ChannelMonitorUpdate`s
and re-apply them as well. For users of async persistence who don't
have any `ChannelMonitorUpdate`s (e.g. because they set
`maximum_pending_updates` to 0 or, in the future, we avoid
persisting updates for small `ChannelMonitor`s), this means two
round-trips to the storage backend, one to load the
`ChannelMonitor` and one to try to read the next
`ChannelMonitorUpdate` only to have it fail.

Instead, here, we use `KVStore::list` to fetch the list of stored
`ChannelMonitorUpdate`s, which for async `KVStore` users allows us
to parallelize the list of update fetching and the
`ChannelMonitor` loading itself. Then we know exactly when to stop
reading `ChannelMonitorUpdate`s, including reading none if there
are none to read. This also avoids relying on `KVStore::read`
correctly returning `NotFound` in order to correctly discover when
to stop reading `ChannelMonitorUpdate`s.
diff --git a/lightning/src/util/async_poll.rs b/lightning/src/util/async_poll.rs
@@ -20,6 +20,69 @@ pub(crate) enum ResultFuture<F: Future<Output = O> + Unpin, O> {
 	Ready(O),
 }
 
+pub(crate) struct TwoFutureJoiner<AO, BO, AF: Future<Output = AO> + Unpin, BF: Future<Output = BO> + Unpin> {
+	a: Option<ResultFuture<AF, AO>>,
+	b: Option<ResultFuture<BF, BO>>,
+}
+
+impl<AO, BO, AF: Future<Output = AO> + Unpin, BF: Future<Output = BO> + Unpin> TwoFutureJoiner<AO, BO, AF, BF> {
+	pub fn new(future_a: AF, future_b: BF) -> Self {
+		Self {
+			a: Some(ResultFuture::Pending(future_a)),
+			b: Some(ResultFuture::Pending(future_b)),
+		}
+	}
+}
+
+impl<AO, BO, AF: Future<Output = AO> + Unpin, BF: Future<Output = BO> + Unpin> Future for TwoFutureJoiner<AO, BO, AF, BF> {
+	type Output = (AO, BO);
+	fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<(AO, BO)> {
+		let mut have_pending_futures = false;
+		// SAFETY: While we are pinned, we can't get direct access to our internal state because we
+		// aren't `Unpin`. However, we don't actually need the `Pin` - we only use it below on the
+		// `Future` in the `ResultFuture::Pending` case, and the `Future` is bound by `Unpin`.
+		// Thus, the `Pin` is not actually used, and its safe to bypass it and access the inner
+		// reference directly.
+		let state = unsafe { &mut self.get_unchecked_mut() };
+		macro_rules! poll_future {
+			($future: ident) => {
+				match state.$future {
+					Some(ResultFuture::Pending(ref mut fut)) => match Pin::new(fut).poll(cx) {
+						Poll::Ready(res) => {
+							state.$future = Some(ResultFuture::Ready(res));
+						},
+						Poll::Pending => {
+							have_pending_futures = true;
+						},
+					},
+					Some(ResultFuture::Ready(_)) => {},
+					None => {
+						debug_assert!(false, "Future polled after Ready");
+						return Poll::Pending;
+					},
+				}
+			};
+		}
+		poll_future!(a);
+		poll_future!(b);
+
+		if have_pending_futures {
+			Poll::Pending
+		} else {
+			Poll::Ready((
+				match state.a.take() {
+					Some(ResultFuture::Ready(a)) => a,
+					_ => unreachable!(),
+				},
+				match state.b.take() {
+					Some(ResultFuture::Ready(b)) => b,
+					_ => unreachable!(),
+				}
+			))
+		}
+	}
+}
+
 pub(crate) struct MultiResultFuturePoller<F: Future<Output = O> + Unpin, O> {
 	futures_state: Vec<ResultFuture<F, O>>,
 }
diff --git a/lightning/src/util/persist.rs b/lightning/src/util/persist.rs
@@ -36,7 +36,7 @@ use crate::ln::types::ChannelId;
 use crate::sign::{ecdsa::EcdsaChannelSigner, EntropySource, SignerProvider};
 use crate::sync::Mutex;
 use crate::util::async_poll::{
-	dummy_waker, MaybeSend, MaybeSync, MultiResultFuturePoller, ResultFuture,
+	dummy_waker, MaybeSend, MaybeSync, MultiResultFuturePoller, ResultFuture, TwoFutureJoiner,
 };
 use crate::util::logger::Logger;
 use crate::util::native_async::FutureSpawner;
@@ -576,15 +576,6 @@ fn poll_sync_future<F: Future>(future: F) -> F::Output {
 /// list channel monitors themselves and load channels individually using
 /// [`MonitorUpdatingPersister::read_channel_monitor_with_updates`].
 ///
-/// ## EXTREMELY IMPORTANT
-///
-/// It is extremely important that your [`KVStoreSync::read`] implementation uses the
-/// [`io::ErrorKind::NotFound`] variant correctly: that is, when a file is not found, and _only_ in
-/// that circumstance (not when there is really a permissions error, for example). This is because
-/// neither channel monitor reading function lists updates. Instead, either reads the monitor, and
-/// using its stored `update_id`, synthesizes update storage keys, and tries them in sequence until
-/// one is not found. All _other_ errors will be bubbled up in the function's [`Result`].
-///
 /// # Pruning stale channel updates
 ///
 /// Stale updates are pruned when the consolidation threshold is reached according to `maximum_pending_updates`.
@@ -658,10 +649,6 @@ where
 	}
 
 	/// Reads all stored channel monitors, along with any stored updates for them.
-	///
-	/// It is extremely important that your [`KVStoreSync::read`] implementation uses the
-	/// [`io::ErrorKind::NotFound`] variant correctly. For more information, please see the
-	/// documentation for [`MonitorUpdatingPersister`].
 	pub fn read_all_channel_monitors_with_updates(
 		&self,
 	) -> Result<
@@ -673,10 +660,6 @@ where
 
 	/// Read a single channel monitor, along with any stored updates for it.
 	///
-	/// It is extremely important that your [`KVStoreSync::read`] implementation uses the
-	/// [`io::ErrorKind::NotFound`] variant correctly. For more information, please see the
-	/// documentation for [`MonitorUpdatingPersister`].
-	///
 	/// For `monitor_key`, channel storage keys can be the channel's funding [`OutPoint`], with an
 	/// underscore `_` between txid and index for v1 channels. For example, given:
 	///
@@ -873,10 +856,6 @@ where
 	/// While the reads themselves are performend in parallel, deserializing the
 	/// [`ChannelMonitor`]s is not. For large [`ChannelMonitor`]s actively used for forwarding,
 	/// this may substantially limit the parallelism of this method.
-	///
-	/// It is extremely important that your [`KVStore::read`] implementation uses the
-	/// [`io::ErrorKind::NotFound`] variant correctly. For more information, please see the
-	/// documentation for [`MonitorUpdatingPersister`].
 	pub async fn read_all_channel_monitors_with_updates(
 		&self,
 	) -> Result<
@@ -911,10 +890,6 @@ where
 	/// Because [`FutureSpawner`] requires that the spawned future be `'static` (matching `tokio`
 	/// and other multi-threaded runtime requirements), this method requires that `self` be an
 	/// `Arc` that can live for `'static` and be sent and accessed across threads.
-	///
-	/// It is extremely important that your [`KVStore::read`] implementation uses the
-	/// [`io::ErrorKind::NotFound`] variant correctly. For more information, please see the
-	/// documentation for [`MonitorUpdatingPersister`].
 	pub async fn read_all_channel_monitors_with_updates_parallel(
 		self: &Arc<Self>,
 	) -> Result<
@@ -954,10 +929,6 @@ where
 
 	/// Read a single channel monitor, along with any stored updates for it.
 	///
-	/// It is extremely important that your [`KVStoreSync::read`] implementation uses the
-	/// [`io::ErrorKind::NotFound`] variant correctly. For more information, please see the
-	/// documentation for [`MonitorUpdatingPersister`].
-	///
 	/// For `monitor_key`, channel storage keys can be the channel's funding [`OutPoint`], with an
 	/// underscore `_` between txid and index for v1 channels. For example, given:
 	///
@@ -1116,40 +1087,37 @@ where
 		io::Error,
 	> {
 		let monitor_name = MonitorName::from_str(monitor_key)?;
-		let read_res = self.maybe_read_monitor(&monitor_name, monitor_key).await?;
-		let (block_hash, monitor) = match read_res {
+		// TODO: After an MSRV bump we should be able to use the pin macro rather than Box::pin
+		let read_future = Box::pin(self.maybe_read_monitor(&monitor_name, monitor_key));
+		let list_future =
+			Box::pin(self.kv_store.list(CHANNEL_MONITOR_UPDATE_PERSISTENCE_PRIMARY_NAMESPACE, monitor_key));
+		let (read_res, list_res) = TwoFutureJoiner::new(read_future, list_future).await;
+		let (block_hash, monitor) = match read_res? {
 			Some(res) => res,
 			None => return Ok(None),
 		};
 		let mut current_update_id = monitor.get_latest_update_id();
-		// TODO: Parallelize this loop by speculatively reading a batch of updates
-		loop {
-			current_update_id = match current_update_id.checked_add(1) {
-				Some(next_update_id) => next_update_id,
-				None => break,
-			};
-			let update_name = UpdateName::from(current_update_id);
-			let update = match self.read_monitor_update(monitor_key, &update_name).await {
-				Ok(update) => update,
-				Err(err) if err.kind() == io::ErrorKind::NotFound => {
-					// We can't find any more updates, so we are done.
-					break;
-				},
-				Err(err) => return Err(err),
-			};
-
-			monitor
-				.update_monitor(&update, &self.broadcaster, &self.fee_estimator, &self.logger)
-				.map_err(|e| {
-				log_error!(
-					self.logger,
-					"Monitor update failed. monitor: {} update: {} reason: {:?}",
-					monitor_key,
-					update_name.as_str(),
-					e
-				);
-				io::Error::new(io::ErrorKind::Other, "Monitor update failed")
-			})?;
+		let updates: Result<Vec<_>, _> =
+			list_res?.into_iter().map(|name| UpdateName::new(name)).collect();
+		let mut updates = updates?;
+		updates.sort_unstable();
+		// TODO: Parallelize this loop
+		for update_name in updates {
+			if update_name.0 > current_update_id {
+				let update = self.read_monitor_update(monitor_key, &update_name).await?;
+				monitor
+					.update_monitor(&update, &self.broadcaster, &self.fee_estimator, &self.logger)
+					.map_err(|e| {
+					log_error!(
+						self.logger,
+						"Monitor update failed. monitor: {} update: {} reason: {:?}",
+						monitor_key,
+						update_name.as_str(),
+						e
+					);
+					io::Error::new(io::ErrorKind::Other, "Monitor update failed")
+				})?;
+			}
 		}
 		Ok(Some((block_hash, monitor)))
 	}
@@ -1524,7 +1492,7 @@ impl core::fmt::Display for MonitorName {
 /// let monitor_name = "some_monitor_name";
 /// let storage_key = format!("channel_monitor_updates/{}/{}", monitor_name, update_name.as_str());
 /// ```
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub struct UpdateName(pub u64, String);
 
 impl UpdateName {