From fa1338179729ace1e8bb56983e5f033479f48d9c Mon Sep 17 00:00:00 2001 From: Wilmer Paulino Date: Thu, 30 Oct 2025 15:58:09 -0700 Subject: [PATCH] Support async fetching of commitment point during channel reestablish `HolderCommitmentPoint` currently tracks the current and next point used on counterparty commitments, which are unrevoked. When we reestablish a channel, the counterparty sends us the commitment height, along with the corresponding secret, for the state they believe to be the latest. We compare said secret to the derived point we fetch from the signer to know if the peer is being honest. Since the protocol does not allow peers (assuming no data loss) to be behind the current state by more than one update, we can cache the two latest revoked commitment points alongside `HolderCommitmentPoint`, such that we no longer need to reach the signer asynchronously when handling `channel_reestablish` messages throughout the happy path. By doing so, we avoid complexity in needing to pause the state machine (which may also result in needing to stash any update messages from the counterparty) while the signer response is pending. The only remaining case left to handle is when the counterparty presents a `channel_reestablish` with a state later than what we know. This can only result in two terminal cases: either they provided a valid commitment secret proving we are behind and we need to panic, or they lied and we force close the channel. This is the only case we choose to handle asynchronously as it's relatively trivial to handle. --- lightning/src/ln/async_signer_tests.rs | 104 ++++++++++++++++- lightning/src/ln/channel.rs | 150 ++++++++++++++++++++----- lightning/src/ln/channelmanager.rs | 22 ++-- 3 files changed, 239 insertions(+), 37 deletions(-) diff --git a/lightning/src/ln/async_signer_tests.rs b/lightning/src/ln/async_signer_tests.rs index 71821081094..41d024b6a1b 100644 --- a/lightning/src/ln/async_signer_tests.rs +++ b/lightning/src/ln/async_signer_tests.rs @@ -11,6 +11,7 @@ //! properly with a signer implementation that asynchronously derives signatures. use crate::prelude::*; +use crate::util::ser::Writeable; use bitcoin::secp256k1::Secp256k1; use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS; @@ -20,7 +21,7 @@ use crate::ln::chan_utils::ClosingTransaction; use crate::ln::channel::DISCONNECT_PEER_AWAITING_RESPONSE_TICKS; use crate::ln::channel_state::{ChannelDetails, ChannelShutdownState}; use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields}; -use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, MessageSendEvent}; +use crate::ln::msgs::{BaseMessageHandler, ChannelMessageHandler, ErrorAction, MessageSendEvent}; use crate::ln::{functional_test_utils::*, msgs}; use crate::sign::ecdsa::EcdsaChannelSigner; use crate::sign::SignerProvider; @@ -1442,3 +1443,104 @@ fn test_no_disconnect_while_async_commitment_signed_expecting_remote_revoke_and_ assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty()); assert!(nodes[1].node.get_and_clear_pending_msg_events().into_iter().any(has_disconnect_event)); } + +#[test] +fn test_async_panic_on_stale_state() { + // Test that we panic if the counterparty sends us a `channel_reestablish` message with a + // `next_remote_commitment_number` greater than what we know with a valid corresponding secret, + // proving that we have lost state, when we have an async signer that is not able to immediately + // fetch the corresponding point to verify. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let stale_persister; + let stale_chain_monitor; + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let stale_node; + let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + + let node_id_0 = nodes[0].node.get_our_node_id(); + let node_id_1 = nodes[1].node.get_our_node_id(); + let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2; + + let encoded_stale_node_1 = nodes[1].node.encode(); + let encoded_stale_monitor_1 = get_monitor!(nodes[1], chan_id).encode(); + + send_payment(&nodes[0], &[&nodes[1]], 1_000_000); + + nodes[0].node.peer_disconnected(node_id_1); + nodes[1].node.peer_disconnected(node_id_0); + + reload_node!( + nodes[1], + encoded_stale_node_1, + &[&encoded_stale_monitor_1], + stale_persister, + stale_chain_monitor, + stale_node + ); + + nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::GetPerCommitmentPoint); + + connect_nodes(&nodes[0], &nodes[1]); + let reestablish_0_to_1 = get_chan_reestablish_msgs!(nodes[0], nodes[1]); + nodes[1].node.handle_channel_reestablish(node_id_0, &reestablish_0_to_1[0]); + + nodes[1].enable_channel_signer_op(&node_id_0, &chan_id, SignerOp::GetPerCommitmentPoint); + std::panic::catch_unwind(|| nodes[1].node.signer_unblocked(None)).unwrap_err(); + nodes[1].logger.assert_log_contains( + "lightning::ln::channel", + "We have fallen behind - we have received proof that if we broadcast our counterparty is going to claim all our funds.", + 1, + ); + + std::panic::catch_unwind(|| drop(nodes)).unwrap_err(); +} + +#[test] +fn test_async_force_close_on_invalid_secret_for_stale_state() { + // Test that we force close a channel if the counterparty sends us a `channel_reestablish` + // message with a `next_remote_commitment_number` greater than what we know with an invalid + // corresponding secret when we have an async signer that is not able to immediately fetch the + // corresponding point to verify. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + + let node_id_0 = nodes[0].node.get_our_node_id(); + let node_id_1 = nodes[1].node.get_our_node_id(); + let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2; + + send_payment(&nodes[0], &[&nodes[1]], 1_000_000); + + nodes[0].node.peer_disconnected(node_id_1); + nodes[1].node.peer_disconnected(node_id_0); + + nodes[1].disable_channel_signer_op(&node_id_0, &chan_id, SignerOp::GetPerCommitmentPoint); + + connect_nodes(&nodes[0], &nodes[1]); + let mut reestablish_0_to_1 = get_chan_reestablish_msgs!(nodes[0], nodes[1]); + let _ = get_chan_reestablish_msgs!(nodes[1], nodes[0]); + reestablish_0_to_1[0].next_remote_commitment_number += 1; + nodes[1].node.handle_channel_reestablish(node_id_0, &reestablish_0_to_1[0]); + + assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); + let msg_events = nodes[1].node.get_and_clear_pending_msg_events(); + match &msg_events[0] { + MessageSendEvent::HandleError { + action: ErrorAction::DisconnectPeerWithWarning { .. }, + .. + } => {}, + _ => panic!("Unexpected event"), + } + + nodes[1].enable_channel_signer_op(&node_id_0, &chan_id, SignerOp::GetPerCommitmentPoint); + nodes[1].node.signer_unblocked(None); + + let closure_reason = ClosureReason::ProcessingError { + err: "Peer sent a channel_reestablish indicating we're stale with an invalid commitment secret".to_owned(), + }; + check_added_monitors(&nodes[1], 1); + check_closed_broadcast(&nodes[1], 1, true); + check_closed_event(&nodes[1], 1, closure_reason, false, &[node_id_0], 100_000); +} diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 75905dba1cd..731f2d0812f 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -1217,6 +1217,8 @@ pub(crate) struct DisconnectResult { #[derive(Debug, Copy, Clone)] struct HolderCommitmentPoint { next_transaction_number: u64, + previous_revoked_point: Option, + last_revoked_point: Option, current_point: Option, next_point: PublicKey, pending_next_point: Option, @@ -1229,6 +1231,8 @@ impl HolderCommitmentPoint { { Some(HolderCommitmentPoint { next_transaction_number: INITIAL_COMMITMENT_NUMBER, + previous_revoked_point: None, + last_revoked_point: None, current_point: None, next_point: signer.as_ref().get_per_commitment_point(INITIAL_COMMITMENT_NUMBER, secp_ctx).ok()?, pending_next_point: signer.as_ref().get_per_commitment_point(INITIAL_COMMITMENT_NUMBER - 1, secp_ctx).ok(), @@ -1239,6 +1243,14 @@ impl HolderCommitmentPoint { self.pending_next_point.is_some() } + pub fn previous_revoked_point(&self) -> Option { + self.previous_revoked_point + } + + pub fn last_revoked_point(&self) -> Option { + self.last_revoked_point + } + pub fn current_transaction_number(&self) -> u64 { self.next_transaction_number + 1 } @@ -1304,6 +1316,8 @@ impl HolderCommitmentPoint { if let Some(next_point) = self.pending_next_point { *self = Self { next_transaction_number: self.next_transaction_number - 1, + previous_revoked_point: self.last_revoked_point, + last_revoked_point: self.current_point, current_point: Some(self.next_point), next_point, pending_next_point: None, @@ -1582,13 +1596,13 @@ where #[rustfmt::skip] pub fn signer_maybe_unblocked( &mut self, chain_hash: ChainHash, logger: &L, path_for_release_htlc: CBP - ) -> Option where L::Target: Logger, CBP: Fn(u64) -> BlindedMessagePath { + ) -> Result, ChannelError> where L::Target: Logger, CBP: Fn(u64) -> BlindedMessagePath { match &mut self.phase { ChannelPhase::Undefined => unreachable!(), - ChannelPhase::Funded(chan) => Some(chan.signer_maybe_unblocked(logger, path_for_release_htlc)), + ChannelPhase::Funded(chan) => chan.signer_maybe_unblocked(logger, path_for_release_htlc).map(|r| Some(r)), ChannelPhase::UnfundedOutboundV1(chan) => { let (open_channel, funding_created) = chan.signer_maybe_unblocked(chain_hash, logger); - Some(SignerResumeUpdates { + Ok(Some(SignerResumeUpdates { commitment_update: None, revoke_and_ack: None, open_channel, @@ -1600,11 +1614,11 @@ where closing_signed: None, signed_closing_tx: None, shutdown_result: None, - }) + })) }, ChannelPhase::UnfundedInboundV1(chan) => { let accept_channel = chan.signer_maybe_unblocked(logger); - Some(SignerResumeUpdates { + Ok(Some(SignerResumeUpdates { commitment_update: None, revoke_and_ack: None, open_channel: None, @@ -1616,9 +1630,9 @@ where closing_signed: None, signed_closing_tx: None, shutdown_result: None, - }) + })) }, - ChannelPhase::UnfundedV2(_) => None, + ChannelPhase::UnfundedV2(_) => Ok(None), } } @@ -2902,6 +2916,7 @@ where /// Similar to [`Self::signer_pending_commitment_update`] but we're waiting to send a /// [`msgs::ChannelReady`]. signer_pending_channel_ready: bool, + signer_pending_stale_state_verification: Option<(u64, SecretKey)>, // pending_update_fee is filled when sending and receiving update_fee. // @@ -3604,6 +3619,7 @@ where signer_pending_funding: false, signer_pending_closing: false, signer_pending_channel_ready: false, + signer_pending_stale_state_verification: None, last_sent_closing_fee: None, last_received_closing_sig: None, @@ -3842,6 +3858,7 @@ where signer_pending_funding: false, signer_pending_closing: false, signer_pending_channel_ready: false, + signer_pending_stale_state_verification: None, last_sent_closing_fee: None, last_received_closing_sig: None, @@ -9455,7 +9472,18 @@ where #[rustfmt::skip] pub fn signer_maybe_unblocked( &mut self, logger: &L, path_for_release_htlc: CBP - ) -> SignerResumeUpdates where L::Target: Logger, CBP: Fn(u64) -> BlindedMessagePath { + ) -> Result where L::Target: Logger, CBP: Fn(u64) -> BlindedMessagePath { + if let Some((commitment_number, commitment_secret)) = self.context.signer_pending_stale_state_verification.clone() { + if let Ok(expected_point) = self.context.holder_signer.as_ref() + .get_per_commitment_point(commitment_number, &self.context.secp_ctx) + { + self.context.signer_pending_stale_state_verification.take(); + if expected_point != PublicKey::from_secret_key(&self.context.secp_ctx, &commitment_secret) { + return Err(ChannelError::close("Peer sent a channel_reestablish indicating we're stale with an invalid commitment secret".to_owned())); + } + Self::panic_on_stale_state(logger); + } + } if !self.holder_commitment_point.can_advance() { log_trace!(logger, "Attempting to update holder per-commitment point..."); self.holder_commitment_point.try_resolve_pending(&self.context.holder_signer, &self.context.secp_ctx, logger); @@ -9539,7 +9567,7 @@ where if signed_closing_tx.is_some() { "a" } else { "no" }, if shutdown_result.is_some() { "a" } else { "no" }); - SignerResumeUpdates { + Ok(SignerResumeUpdates { commitment_update, revoke_and_ack, open_channel: None, @@ -9551,7 +9579,7 @@ where closing_signed, signed_closing_tx, shutdown_result, - } + }) } fn get_last_revoke_and_ack( @@ -9732,6 +9760,25 @@ where } } + fn panic_on_stale_state(logger: &L) + where + L::Target: Logger, + { + macro_rules! log_and_panic { + ($err_msg: expr) => { + log_error!(logger, $err_msg); + panic!($err_msg); + }; + } + log_and_panic!("We have fallen behind - we have received proof that if we broadcast our counterparty is going to claim all our funds.\n\ + This implies you have restarted with lost ChannelMonitor and ChannelManager state, the first of which is a violation of the LDK chain::Watch requirements.\n\ + More specifically, this means you have a bug in your implementation that can cause loss of funds, or you are running with an old backup, which is unsafe.\n\ + If you have restored from an old backup and wish to claim any available funds, you should restart with\n\ + an empty ChannelManager and no ChannelMonitors, reconnect to peer(s), ensure they've force-closed all of your\n\ + previous channels and that the closure transaction(s) have confirmed on-chain,\n\ + then restart with an empty ChannelManager and the latest ChannelMonitors that you do have."); + } + /// May panic if some calls other than message-handling calls (which will all Err immediately) /// have been called between remove_uncommitted_htlcs_and_mark_paused and this call. #[rustfmt::skip] @@ -9765,28 +9812,34 @@ where let our_commitment_transaction = INITIAL_COMMITMENT_NUMBER - self.holder_commitment_point.current_transaction_number(); if msg.next_remote_commitment_number > 0 { - let expected_point = self.context.holder_signer.as_ref() - .get_per_commitment_point(INITIAL_COMMITMENT_NUMBER - msg.next_remote_commitment_number + 1, &self.context.secp_ctx) - .expect("TODO: async signing is not yet supported for per commitment points upon channel reestablishment"); let given_secret = SecretKey::from_slice(&msg.your_last_per_commitment_secret) .map_err(|_| ChannelError::close("Peer sent a garbage channel_reestablish with unparseable secret key".to_owned()))?; - if expected_point != PublicKey::from_secret_key(&self.context.secp_ctx, &given_secret) { - return Err(ChannelError::close("Peer sent a garbage channel_reestablish with secret key not matching the commitment height provided".to_owned())); - } if msg.next_remote_commitment_number > our_commitment_transaction { - macro_rules! log_and_panic { - ($err_msg: expr) => { - log_error!(logger, $err_msg); - panic!($err_msg); - } + let commitment_number = INITIAL_COMMITMENT_NUMBER - msg.next_remote_commitment_number + 1; + let expected_point = self.context.holder_signer.as_ref() + .get_per_commitment_point(commitment_number, &self.context.secp_ctx) + .ok(); + if expected_point.is_none() { + self.context.signer_pending_stale_state_verification = Some((commitment_number, given_secret)); + log_info!(logger, "Waiting on async signer to verify stale state proof"); + return Err(ChannelError::WarnAndDisconnect("Channel is not ready to be reestablished yet".to_owned())); + } + if expected_point != Some(PublicKey::from_secret_key(&self.context.secp_ctx, &given_secret)) { + return Err(ChannelError::close("Peer sent a channel_reestablish indicating we're stale with an invalid commitment secret".to_owned())); + } + Self::panic_on_stale_state(logger); + } else if msg.next_remote_commitment_number == our_commitment_transaction { + let expected_point = self.holder_commitment_point.last_revoked_point() + .expect("The last revoked commitment point must exist when the state has advanced"); + if expected_point != PublicKey::from_secret_key(&self.context.secp_ctx, &given_secret) { + return Err(ChannelError::close("Peer sent a garbage channel_reestablish with secret key not matching the commitment height provided".to_owned())); + } + } else if msg.next_remote_commitment_number + 1 == our_commitment_transaction { + let expected_point = self.holder_commitment_point.previous_revoked_point() + .expect("The previous revoked commitment point must exist when they are one state behind"); + if expected_point != PublicKey::from_secret_key(&self.context.secp_ctx, &given_secret) { + return Err(ChannelError::close("Peer sent a garbage channel_reestablish with secret key not matching the commitment height provided".to_owned())); } - log_and_panic!("We have fallen behind - we have received proof that if we broadcast our counterparty is going to claim all our funds.\n\ - This implies you have restarted with lost ChannelMonitor and ChannelManager state, the first of which is a violation of the LDK chain::Watch requirements.\n\ - More specifically, this means you have a bug in your implementation that can cause loss of funds, or you are running with an old backup, which is unsafe.\n\ - If you have restored from an old backup and wish to claim any available funds, you should restart with\n\ - an empty ChannelManager and no ChannelMonitors, reconnect to peer(s), ensure they've force-closed all of your\n\ - previous channels and that the closure transaction(s) have confirmed on-chain,\n\ - then restart with an empty ChannelManager and the latest ChannelMonitors that you do have."); } } @@ -14728,6 +14781,10 @@ where } let is_manual_broadcast = Some(self.context.is_manual_broadcast); + let holder_commitment_point_previous_revoked = + self.holder_commitment_point.previous_revoked_point(); + let holder_commitment_point_last_revoked = + self.holder_commitment_point.last_revoked_point(); let holder_commitment_point_current = self.holder_commitment_point.current_point(); let holder_commitment_point_next = self.holder_commitment_point.next_point(); let holder_commitment_point_pending_next = self.holder_commitment_point.pending_next_point; @@ -14789,6 +14846,8 @@ where (65, self.quiescent_action, option), // Added in 0.2 (67, pending_outbound_held_htlc_flags, optional_vec), // Added in 0.2 (69, holding_cell_held_htlc_flags, optional_vec), // Added in 0.2 + (71, holder_commitment_point_previous_revoked, option), // Added in 0.3 + (73, holder_commitment_point_last_revoked, option), // Added in 0.3 }); Ok(()) @@ -15140,6 +15199,8 @@ where let mut malformed_htlcs: Option> = None; let mut monitor_pending_update_adds: Option> = None; + let mut holder_commitment_point_previous_revoked_opt: Option = None; + let mut holder_commitment_point_last_revoked_opt: Option = None; let mut holder_commitment_point_current_opt: Option = None; let mut holder_commitment_point_next_opt: Option = None; let mut holder_commitment_point_pending_next_opt: Option = None; @@ -15203,6 +15264,8 @@ where (65, quiescent_action, upgradable_option), // Added in 0.2 (67, pending_outbound_held_htlc_flags_opt, optional_vec), // Added in 0.2 (69, holding_cell_held_htlc_flags_opt, optional_vec), // Added in 0.2 + (71, holder_commitment_point_previous_revoked_opt, option), // Added in 0.3 + (73, holder_commitment_point_last_revoked_opt, option), // Added in 0.3 }); let holder_signer = signer_provider.derive_channel_signer(channel_keys_id); @@ -15418,9 +15481,37 @@ where } }); + let previous_revoked_point = + holder_commitment_point_previous_revoked_opt.or_else(|| { + if holder_commitment_next_transaction_number > INITIAL_COMMITMENT_NUMBER - 3 { + None + } else { + Some(holder_signer + .get_per_commitment_point( + holder_commitment_next_transaction_number + 3, + &secp_ctx, + ) + .expect("Must be able to derive the previous revoked commitment point upon channel restoration")) + } + }); + let last_revoked_point = holder_commitment_point_last_revoked_opt.or_else(|| { + if holder_commitment_next_transaction_number > INITIAL_COMMITMENT_NUMBER - 2 { + None + } else { + Some(holder_signer + .get_per_commitment_point( + holder_commitment_next_transaction_number + 2, + &secp_ctx, + ) + .expect("Must be able to derive the last revoked commitment point upon channel restoration")) + } + }); + match (holder_commitment_point_next_opt, holder_commitment_point_pending_next_opt) { (Some(next_point), pending_next_point) => HolderCommitmentPoint { next_transaction_number: holder_commitment_next_transaction_number, + previous_revoked_point, + last_revoked_point, current_point, next_point, pending_next_point, @@ -15441,6 +15532,8 @@ where ); HolderCommitmentPoint { next_transaction_number: holder_commitment_next_transaction_number, + previous_revoked_point, + last_revoked_point, current_point, next_point, pending_next_point: Some(pending_next_point), @@ -15527,6 +15620,7 @@ where signer_pending_funding: false, signer_pending_closing: false, signer_pending_channel_ready: false, + signer_pending_stale_state_verification: None, pending_update_fee, holding_cell_update_fee, diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 644920557d2..8485831ebdf 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -12197,15 +12197,14 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(self); // Returns whether we should remove this channel as it's just been closed. - let unblock_chan = |chan: &mut Channel, pending_msg_events: &mut Vec| -> Option { + let unblock_chan = |chan: &mut Channel, pending_msg_events: &mut Vec| -> Result, ChannelError> { let channel_id = chan.context().channel_id(); let outbound_scid_alias = chan.context().outbound_scid_alias(); let logger = WithChannelContext::from(&self.logger, &chan.context(), None); let node_id = chan.context().get_counterparty_node_id(); - if let Some(msgs) = chan.signer_maybe_unblocked( - self.chain_hash, &&logger, - |htlc_id| self.path_for_release_held_htlc(htlc_id, outbound_scid_alias, &channel_id, &node_id) - ) { + let cbp = |htlc_id| self.path_for_release_held_htlc(htlc_id, outbound_scid_alias, &channel_id, &node_id); + let msgs = chan.signer_maybe_unblocked(self.chain_hash, &&logger, cbp)?; + if let Some(msgs) = msgs { if chan.context().is_connected() { if let Some(msg) = msgs.open_channel { pending_msg_events.push(MessageSendEvent::SendOpenChannel { @@ -12274,9 +12273,9 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ debug_assert!(msgs.channel_ready.is_none()); debug_assert!(msgs.signed_closing_tx.is_none()); } - msgs.shutdown_result + Ok(msgs.shutdown_result) } else { - None + Ok(None) } }; @@ -12293,7 +12292,14 @@ This indicates a bug inside LDK. Please report this error at https://github.com/ peer_state.channel_by_id.retain(|_, chan| { let shutdown_result = match channel_opt { Some((_, channel_id)) if chan.context().channel_id() != channel_id => None, - _ => unblock_chan(chan, &mut peer_state.pending_msg_events), + _ => match unblock_chan(chan, &mut peer_state.pending_msg_events) { + Ok(shutdown_result) => shutdown_result, + Err(err) => { + let (_, err) = convert_channel_err!(self, peer_state, err, chan); + shutdown_results.push((Err(err), *cp_id)); + return false; + }, + }, }; if let Some(shutdown) = shutdown_result { let context = chan.context();