lightningdevkit · waterson · Aug 10, 2023 · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
@@ -2752,9 +2752,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
 							},
 							commitment_txid: htlc.commitment_txid,
 							per_commitment_number: htlc.per_commitment_number,
-							per_commitment_point: self.onchain_tx_handler.signer.get_per_commitment_point(
-								htlc.per_commitment_number, &self.onchain_tx_handler.secp_ctx,
-							),
+							per_commitment_point: htlc.per_commitment_point,
 							htlc: htlc.htlc,
 							preimage: htlc.preimage,
 							counterparty_sig: htlc.counterparty_sig,

diff --git a/lightning/src/chain/onchaintx.rs b/lightning/src/chain/onchaintx.rs
@@ -179,6 +179,7 @@ pub(crate) struct ExternalHTLCClaim {
 	pub(crate) htlc: HTLCOutputInCommitment,
 	pub(crate) preimage: Option<PaymentPreimage>,
 	pub(crate) counterparty_sig: Signature,
+	pub(crate) per_commitment_point: bitcoin::secp256k1::PublicKey,
 }
 
 // Represents the different types of claims for which events are yielded externally to satisfy said
@@ -1188,9 +1189,16 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner> OnchainTxHandler<ChannelSigner>
 				})
 				.map(|(htlc_idx, htlc)| {
 					let counterparty_htlc_sig = holder_commitment.counterparty_htlc_sigs[htlc_idx];
+
+					// TODO(waterson) fallible: move this somewhere!
+					let per_commitment_point = self.signer.get_per_commitment_point(
+						trusted_tx.commitment_number(), &self.secp_ctx,
+					).unwrap();
+
 					ExternalHTLCClaim {
 						commitment_txid: trusted_tx.txid(),
 						per_commitment_number: trusted_tx.commitment_number(),
+						per_commitment_point: per_commitment_point,
 						htlc: htlc.clone(),
 						preimage: *preimage,
 						counterparty_sig: counterparty_htlc_sig,

diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
@@ -470,7 +470,7 @@ impl MsgHandleErrInternal {
 						log_level: Level::Warn,
 					},
 				},
-				ChannelError::Ignore(msg) => LightningError {
+				ChannelError::Ignore(msg) | ChannelError::Retry(msg) => LightningError {
 					err: msg,
 					action: msgs::ErrorAction::IgnoreError,
 				},
@@ -663,6 +663,15 @@ impl_writeable_tlv_based_enum!(RAAMonitorUpdateBlockingAction,
 	(0, ForwardedPaymentInboundClaim) => { (0, channel_id, required), (2, htlc_id, required) }
 ;);
 
+pub(super) enum ChannelRetryState {
+	FundingCreated(msgs::FundingCreated),
+	FundingSigned(msgs::FundingSigned),
+	ChannelReestablish(msgs::ChannelReestablish),
+	CommitmentSigned(msgs::CommitmentSigned),
+	CompleteAcceptingInboundV1Channel(),
+	CompleteCreatingOutboundV1Channel(),
+}
+
 
 /// State we hold per-peer.
 pub(super) struct PeerState<SP: Deref> where SP::Target: SignerProvider {
@@ -689,6 +698,8 @@ pub(super) struct PeerState<SP: Deref> where SP::Target: SignerProvider {
 	/// removed, and an InboundV1Channel is created and placed in the `inbound_v1_channel_by_id` table. If
 	/// the channel is rejected, then the entry is simply removed.
 	pub(super) inbound_channel_request_by_id: HashMap<ChannelId, InboundChannelRequest>,
+	/// Messages that we attempted to process but returned `ChannelError::Retry`.
+	pub(super) retry_state_by_id: HashMap<ChannelId, ChannelRetryState>,
 	/// The latest `InitFeatures` we heard from the peer.
 	latest_features: InitFeatures,
 	/// Messages to send to the peer - pushed to in the same lock that they are generated in (except
@@ -1812,7 +1823,7 @@ macro_rules! convert_chan_err {
 			ChannelError::Warn(msg) => {
 				(false, MsgHandleErrInternal::from_chan_no_close(ChannelError::Warn(msg), $channel_id.clone()))
 			},
-			ChannelError::Ignore(msg) => {
+			ChannelError::Ignore(msg) | ChannelError::Retry(msg) => {
 				(false, MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore(msg), $channel_id.clone()))
 			},
 			ChannelError::Close(msg) => {
@@ -1828,7 +1839,7 @@ macro_rules! convert_chan_err {
 		match $err {
 			// We should only ever have `ChannelError::Close` when unfunded channels error.
 			// In any case, just close the channel.
-			ChannelError::Warn(msg) | ChannelError::Ignore(msg) | ChannelError::Close(msg) => {
+			ChannelError::Warn(msg) | ChannelError::Ignore(msg) | ChannelError::Retry(msg) | ChannelError::Close(msg) => {
 				log_error!($self.logger, "Closing unfunded channel {} due to an error: {}", &$channel_id, msg);
 				update_maps_on_chan_removal!($self, &$channel_context);
 				let shutdown_res = $channel_context.force_shutdown(false);
@@ -2286,9 +2297,18 @@ where
 				},
 			}
 		};
-		let res = channel.get_open_channel(self.genesis_hash.clone());
 
 		let temporary_channel_id = channel.context.channel_id();
+		if channel.context.has_first_holder_per_commitment_point() {
+			peer_state.pending_msg_events.push(events::MessageSendEvent::SendOpenChannel {
+				node_id: their_network_key,
+				msg: channel.get_open_channel(self.genesis_hash.clone()),
+			});
+		} else {
+			log_info!(self.logger, "First per-commitment point is not available for {temporary_channel_id}, scheduling retry.");
+			peer_state.retry_state_by_id.insert(temporary_channel_id.clone(), ChannelRetryState::CompleteCreatingOutboundV1Channel());
+		}
+
 		match peer_state.outbound_v1_channel_by_id.entry(temporary_channel_id) {
 			hash_map::Entry::Occupied(_) => {
 				if cfg!(fuzzing) {
@@ -2300,13 +2320,35 @@ where
 			hash_map::Entry::Vacant(entry) => { entry.insert(channel); }
 		}
 
-		peer_state.pending_msg_events.push(events::MessageSendEvent::SendOpenChannel {
-			node_id: their_network_key,
-			msg: res,
-		});
 		Ok(temporary_channel_id)
 	}
 
+	fn complete_creating_outbound_v1_channel(&self, counterparty_node_id: &PublicKey, temporary_channel_id: &ChannelId) {
+		log_info!(self.logger, "Retrying create for channel {temporary_channel_id}");
+		self.with_mut_peer_state(counterparty_node_id, |peer_state: &mut PeerState<SP>| {
+			let channel_opt = peer_state.outbound_v1_channel_by_id.get_mut(temporary_channel_id);
+			if channel_opt.is_none() {
+				log_error!(self.logger, "Cannot find outbound channel with temporary ID {temporary_channel_id} for which to complete creation");
+				return;
+			}
+
+			log_info!(self.logger, "Found outbound channel with temporary ID {temporary_channel_id}");
+
+			let channel = channel_opt.unwrap();
+			if channel.set_first_holder_per_commitment_point().is_err() {
+				log_error!(self.logger, "Commitment point for outbound channel with temporary ID {temporary_channel_id} was not available on retry");
+				return;
+			}
+
+			log_info!(self.logger, "Set first per-commitment point for outbound channel {temporary_channel_id}");
+
+			peer_state.pending_msg_events.push(events::MessageSendEvent::SendOpenChannel {
+				node_id: *counterparty_node_id,
+				msg: channel.get_open_channel(self.genesis_hash.clone()),
+			});
+		});
+	}
+
 	fn list_funded_channels_with_filter<Fn: FnMut(&(&ChannelId, &Channel<SP>)) -> bool + Copy>(&self, f: Fn) -> Vec<ChannelDetails> {
 		// Allocate our best estimate of the number of channels we have in the `res`
 		// Vec. Sadly the `short_to_chan_info` map doesn't cover channels without
@@ -5423,16 +5465,66 @@ where
 		let outbound_scid_alias = self.create_and_insert_outbound_scid_alias();
 		channel.context.set_outbound_scid_alias(outbound_scid_alias);
 
-		peer_state.pending_msg_events.push(events::MessageSendEvent::SendAcceptChannel {
-			node_id: channel.context.get_counterparty_node_id(),
-			msg: channel.accept_inbound_channel(),
-		});
+		// If the channel context has the first commitment point, then we can immediately complete the
+		// channel acceptance by sending the accept_channel message. Otherwise, we'll need to wait for
+		// the signer to assign us the first commitment point.
+		if channel.context.has_first_holder_per_commitment_point() {
+			peer_state.pending_msg_events.push(events::MessageSendEvent::SendAcceptChannel {
+				node_id: channel.context.get_counterparty_node_id(),
+				msg: channel.accept_inbound_channel(),
+			});
+		} else {
+			log_info!(self.logger, "First per-commitment point is not available for {temporary_channel_id}, scheduling retry.");			
+			peer_state.retry_state_by_id.insert(*temporary_channel_id, ChannelRetryState::CompleteAcceptingInboundV1Channel());
+		}
 
 		peer_state.inbound_v1_channel_by_id.insert(temporary_channel_id.clone(), channel);
-
 		Ok(())
 	}
 
+	/// Performs `f` with the mutable peer state for the specified counterparty node.
+	///
+	/// Returns `None` if no such counterparty node exists; otherwise, returns the result of `f`
+	/// wrapped in `Some`.
+	fn with_mut_peer_state<Out, Fn>(&self, counterparty_node_id: &PublicKey, f: Fn) -> Option<Out>
+		where Fn: FnOnce(&mut PeerState<SP>) -> Out
+	{
+		let per_peer_state = self.per_peer_state.read().unwrap();
+		let peer_state_mutex = per_peer_state.get(counterparty_node_id)?;
+		let mut peer_state_lock = peer_state_mutex.lock().unwrap();
+		Some(f(&mut *peer_state_lock))
+	}
+
+	/// Retries an `InboundV1Channel` accept.
+	///
+	/// Attempts to resolve the first per-commitment point, and if successful broadcasts the
+	/// `accept_channel` message to the peer.
+	fn retry_accepting_inbound_v1_channel(&self, counterparty_node_id: &PublicKey, temporary_channel_id: &ChannelId) {
+		log_info!(self.logger, "Retrying accept for channel {temporary_channel_id}");
+		self.with_mut_peer_state(counterparty_node_id, |peer_state: &mut PeerState<SP>| {
+			let channel_opt = peer_state.inbound_v1_channel_by_id.get_mut(temporary_channel_id);
+			if channel_opt.is_none() {
+				log_error!(self.logger, "Cannot find channel with temporary ID {temporary_channel_id} for which to complete accepting");
+				return;
+			}
+
+			log_info!(self.logger, "Found channel with temporary ID {temporary_channel_id}");
+
+			let channel = channel_opt.unwrap();
+			if channel.set_first_holder_per_commitment_point().is_err() {
+				log_error!(self.logger, "Commitment point for channel with temporary ID {temporary_channel_id} was not available on retry");
+				return;
+			}
+
+			log_info!(self.logger, "Set first per-commitment point for channel {temporary_channel_id}");
+
+			peer_state.pending_msg_events.push(events::MessageSendEvent::SendAcceptChannel {
+				node_id: channel.context.get_counterparty_node_id(),
+				msg: channel.accept_inbound_channel(),
+			});
+		});
+	}
+
 	/// Gets the number of peers which match the given filter and do not have any funded, outbound,
 	/// or 0-conf channels.
 	///
@@ -5622,6 +5714,13 @@ where
 				Some(inbound_chan) => {
 					match inbound_chan.funding_created(msg, best_block, &self.signer_provider, &self.logger) {
 						Ok(res) => res,
+						Err((inbound_chan, err @ ChannelError::Retry(_))) => {
+							// If we get an `Retry` error then something transient went wrong. Put the channel
+							// back into the table and bail.
+							peer_state.inbound_v1_channel_by_id.insert(msg.temporary_channel_id, inbound_chan);
+							peer_state.retry_state_by_id.insert(msg.temporary_channel_id, ChannelRetryState::FundingCreated(msg.clone()));
+							return Err(MsgHandleErrInternal::from_chan_no_close(err, msg.temporary_channel_id));
+						},
 						Err((mut inbound_chan, err)) => {
 							// We've already removed this inbound channel from the map in `PeerState`
 							// above so at this point we just need to clean up any lingering entries
@@ -5698,8 +5797,13 @@ where
 		let peer_state = &mut *peer_state_lock;
 		match peer_state.channel_by_id.entry(msg.channel_id) {
 			hash_map::Entry::Occupied(mut chan) => {
-				let monitor = try_chan_entry!(self,
-					chan.get_mut().funding_signed(&msg, best_block, &self.signer_provider, &self.logger), chan);
+				let res = chan.get_mut().funding_signed(&msg, best_block, &self.signer_provider, &self.logger);
+				if let Err(ref err) = &res {
+					if let ChannelError::Retry(_) = err {
+						peer_state.retry_state_by_id.insert(msg.channel_id, ChannelRetryState::FundingSigned(msg.clone()));
+					}
+				}
+				let monitor = try_chan_entry!(self, res, chan);
 				let update_res = self.chain_monitor.watch_channel(chan.get().context.get_funding_txo().unwrap(), monitor);
 				let mut res = handle_new_monitor_update!(self, update_res, peer_state_lock, peer_state, per_peer_state, chan, INITIAL_MONITOR);
 				if let Err(MsgHandleErrInternal { ref mut shutdown_finish, .. }) = res {
@@ -6008,7 +6112,13 @@ where
 		match peer_state.channel_by_id.entry(msg.channel_id) {
 			hash_map::Entry::Occupied(mut chan) => {
 				let funding_txo = chan.get().context.get_funding_txo();
-				let monitor_update_opt = try_chan_entry!(self, chan.get_mut().commitment_signed(&msg, &self.logger), chan);
+				let res = chan.get_mut().commitment_signed(&msg, &self.logger);
+				if let Err(ref err) = &res {
+					if let ChannelError::Retry(_) = err {
+						peer_state.retry_state_by_id.insert(msg.channel_id, ChannelRetryState::CommitmentSigned(msg.clone()));
+					}
+				}
+				let monitor_update_opt = try_chan_entry!(self, res, chan);
 				if let Some(monitor_update) = monitor_update_opt {
 					handle_new_monitor_update!(self, funding_txo.unwrap(), monitor_update, peer_state_lock,
 						peer_state, per_peer_state, chan).map(|_| ())
@@ -6280,9 +6390,15 @@ where
 					// disconnect, so Channel's reestablish will never hand us any holding cell
 					// freed HTLCs to fail backwards. If in the future we no longer drop pending
 					// add-HTLCs on disconnect, we may be handed HTLCs to fail backwards here.
-					let responses = try_chan_entry!(self, chan.get_mut().channel_reestablish(
+					let res = chan.get_mut().channel_reestablish(
 						msg, &self.logger, &self.node_signer, self.genesis_hash,
-						&self.default_configuration, &*self.best_block.read().unwrap()), chan);
+						&self.default_configuration, &*self.best_block.read().unwrap());
+					if let Err(ref err) = &res {
+						if let ChannelError::Retry(_) = err {
+							peer_state.retry_state_by_id.insert(msg.channel_id, ChannelRetryState::ChannelReestablish(msg.clone()));
+						}
+					}
+					let responses = try_chan_entry!(self, res, chan);
 					let mut channel_update = None;
 					if let Some(msg) = responses.shutdown_msg {
 						peer_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
@@ -6830,6 +6946,42 @@ where
 		let mut ev;
 		process_events_body!(self, ev, { handler(ev).await });
 	}
+
+	/// Resumes processing for a channel awaiting a signature.
+	///
+	/// If the `ChannelSigner` for a channel has previously returned an `Err`, then activity on the
+	/// channel will have been suspended pending the required signature becoming available. This
+	/// resumes the channel's operation.
+	pub fn retry_channel(&self, channel_id: &ChannelId, counterparty_node_id: &PublicKey) -> Result<(), APIError> {
+		log_info!(self.logger, "Running retry_channel for {channel_id} on {counterparty_node_id}");
+		let retry_state = {
+			let per_peer_state = self.per_peer_state.read().unwrap();
+			let peer_state_mutex = per_peer_state.get(counterparty_node_id)
+				.ok_or(APIError::APIMisuseError {
+					err: format!("Can't find a peer matching the passed counterparty node_id {}", counterparty_node_id)
+				})?;
+
+			let mut peer_state_lock = peer_state_mutex.lock().unwrap();
+			let peer_state = &mut *peer_state_lock;
+			match peer_state.retry_state_by_id.remove(channel_id) {
+				Some(retry_state) => Ok(retry_state),
+				None => Err(APIError::APIMisuseError {
+					err: format!("Can't find a channel matching the passed channel_id {}", channel_id)
+				}),
+			}
+		}?;
+
+		match retry_state {
+			ChannelRetryState::FundingCreated(ref msg) => self.handle_funding_created(counterparty_node_id, msg),
+			ChannelRetryState::FundingSigned(ref msg) => self.handle_funding_signed(counterparty_node_id, msg),
+			ChannelRetryState::ChannelReestablish(ref msg) => self.handle_channel_reestablish(counterparty_node_id, msg),
+			ChannelRetryState::CommitmentSigned(ref msg) => self.handle_commitment_signed(counterparty_node_id, msg),
+			ChannelRetryState::CompleteAcceptingInboundV1Channel() => self.retry_accepting_inbound_v1_channel(counterparty_node_id, channel_id),
+			ChannelRetryState::CompleteCreatingOutboundV1Channel() => self.complete_creating_outbound_v1_channel(counterparty_node_id, channel_id),
+		};
+
+		Ok(())
+	}
 }
 
 impl<M: Deref, T: Deref, ES: Deref, NS: Deref, SP: Deref, F: Deref, R: Deref, L: Deref> MessageSendEventsProvider for ChannelManager<M, T, ES, NS, SP, F, R, L>
@@ -7504,6 +7656,7 @@ where
 						outbound_v1_channel_by_id: HashMap::new(),
 						inbound_v1_channel_by_id: HashMap::new(),
 						inbound_channel_request_by_id: HashMap::new(),
+						retry_state_by_id: HashMap::new(),
 						latest_features: init_msg.features.clone(),
 						pending_msg_events: Vec::new(),
 						in_flight_monitor_updates: BTreeMap::new(),
@@ -8752,6 +8905,7 @@ where
 				outbound_v1_channel_by_id: HashMap::new(),
 				inbound_v1_channel_by_id: HashMap::new(),
 				inbound_channel_request_by_id: HashMap::new(),
+				retry_state_by_id: HashMap::new(),
 				latest_features: InitFeatures::empty(),
 				pending_msg_events: Vec::new(),
 				in_flight_monitor_updates: BTreeMap::new(),

diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs
@@ -29,6 +29,8 @@ use crate::util::test_utils;
 use crate::util::test_utils::{panicking, TestChainMonitor, TestScorer, TestKeysInterface};
 use crate::util::errors::APIError;
 use crate::util::config::{UserConfig, MaxDustHTLCExposure};
+#[cfg(test)]
+use crate::util::logger::Logger;
 use crate::util::ser::{ReadableArgs, Writeable};
 
 use bitcoin::blockdata::block::{Block, BlockHeader};
@@ -414,6 +416,31 @@ impl<'a, 'b, 'c> Node<'a, 'b, 'c> {
 	pub fn get_block_header(&self, height: u32) -> BlockHeader {
 		self.blocks.lock().unwrap()[height as usize].0.header
 	}
+	/// Changes the channel signer's availability for the specified peer and channel.
+	///
+	/// When `available` is set to `true`, the channel signer will behave normally. When set to
+	/// `false`, the channel signer will act like an off-line remote signer and will return `Err` for
+	/// several of the signing methods. Currently, only `get_per_commitment_point` and
+	/// `release_commitment_secret` are affected by this setting.
+	#[cfg(test)]
+	pub fn set_channel_signer_available(&self, peer_id: &PublicKey, chan_id: &ChannelId, available: bool) {
+		let per_peer_state = self.node.per_peer_state.read().unwrap();
+		let chan_lock = per_peer_state.get(peer_id).unwrap().lock().unwrap();
+		let signer = (|| {
+			if let Some(local_chan) = chan_lock.channel_by_id.get(chan_id) {
+				return local_chan.get_signer();
+			}
+			if let Some(local_chan) = chan_lock.inbound_v1_channel_by_id.get(chan_id) {
+				return local_chan.context.get_signer();
+			}
+			if let Some(local_chan) = chan_lock.outbound_v1_channel_by_id.get(chan_id) {
+				return local_chan.context.get_signer();
+			}
+			panic!("Couldn't find a channel with id {}", chan_id);
+		})();
+		log_debug!(self.logger, "Setting channel {} as available={}", chan_id, available);
+		signer.as_ecdsa().unwrap().set_available(available);
+	}
 }
 
 /// If we need an unsafe pointer to a `Node` (ie to reference it in a thread
@@ -2031,12 +2058,13 @@ macro_rules! expect_channel_shutdown_state {
 }
 
 #[cfg(any(test, ldk_bench, feature = "_test_utils"))]
-pub fn expect_channel_pending_event<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, expected_counterparty_node_id: &PublicKey) {
+pub fn expect_channel_pending_event<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, expected_counterparty_node_id: &PublicKey) -> ChannelId {
 	let events = node.node.get_and_clear_pending_events();
 	assert_eq!(events.len(), 1);
-	match events[0] {
-		crate::events::Event::ChannelPending { ref counterparty_node_id, .. } => {
+	match &events[0] {
+		crate::events::Event::ChannelPending { channel_id, counterparty_node_id, .. } => {
 			assert_eq!(*expected_counterparty_node_id, *counterparty_node_id);
+			*channel_id
 		},
 		_ => panic!("Unexpected event"),
 	}