Skip to content

Commit 281a0ae

Browse files
authored
Merge pull request #2558 from waterson/pr-2554
Handle retrying sign_counterparty_commitment failures
2 parents d795e24 + 014a336 commit 281a0ae

8 files changed

+687
-130
lines changed

fuzz/src/chanmon_consistency.rs

+1
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ impl SignerProvider for KeyProvider {
267267
inner,
268268
state,
269269
disable_revocation_policy_check: false,
270+
available: Arc::new(Mutex::new(true)),
270271
})
271272
}
272273

lightning/src/ln/async_signer_tests.rs

+323
Large diffs are not rendered by default.

lightning/src/ln/channel.rs

+213-105
Large diffs are not rendered by default.

lightning/src/ln/channelmanager.rs

+85-14
Original file line numberDiff line numberDiff line change
@@ -3802,7 +3802,7 @@ where
38023802

38033803
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
38043804
let peer_state = &mut *peer_state_lock;
3805-
let (chan, msg) = match peer_state.channel_by_id.remove(temporary_channel_id) {
3805+
let (chan, msg_opt) = match peer_state.channel_by_id.remove(temporary_channel_id) {
38063806
Some(ChannelPhase::UnfundedOutboundV1(chan)) => {
38073807
let funding_txo = find_funding_output(&chan, &funding_transaction)?;
38083808

@@ -3841,10 +3841,12 @@ where
38413841
}),
38423842
};
38433843

3844-
peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingCreated {
3845-
node_id: chan.context.get_counterparty_node_id(),
3846-
msg,
3847-
});
3844+
if let Some(msg) = msg_opt {
3845+
peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingCreated {
3846+
node_id: chan.context.get_counterparty_node_id(),
3847+
msg,
3848+
});
3849+
}
38483850
match peer_state.channel_by_id.entry(chan.context.channel_id()) {
38493851
hash_map::Entry::Occupied(_) => {
38503852
panic!("Generated duplicate funding txid?");
@@ -6229,7 +6231,7 @@ where
62296231

62306232
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
62316233
let peer_state = &mut *peer_state_lock;
6232-
let (chan, funding_msg, monitor) =
6234+
let (chan, funding_msg_opt, monitor) =
62336235
match peer_state.channel_by_id.remove(&msg.temporary_channel_id) {
62346236
Some(ChannelPhase::UnfundedInboundV1(inbound_chan)) => {
62356237
match inbound_chan.funding_created(msg, best_block, &self.signer_provider, &self.logger) {
@@ -6252,17 +6254,20 @@ where
62526254
None => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.temporary_channel_id))
62536255
};
62546256

6255-
match peer_state.channel_by_id.entry(funding_msg.channel_id) {
6257+
match peer_state.channel_by_id.entry(chan.context.channel_id()) {
62566258
hash_map::Entry::Occupied(_) => {
6257-
Err(MsgHandleErrInternal::send_err_msg_no_close("Already had channel with the new channel_id".to_owned(), funding_msg.channel_id))
6259+
Err(MsgHandleErrInternal::send_err_msg_no_close(
6260+
"Already had channel with the new channel_id".to_owned(),
6261+
chan.context.channel_id()
6262+
))
62586263
},
62596264
hash_map::Entry::Vacant(e) => {
62606265
let mut id_to_peer_lock = self.id_to_peer.lock().unwrap();
62616266
match id_to_peer_lock.entry(chan.context.channel_id()) {
62626267
hash_map::Entry::Occupied(_) => {
62636268
return Err(MsgHandleErrInternal::send_err_msg_no_close(
62646269
"The funding_created message had the same funding_txid as an existing channel - funding is not possible".to_owned(),
6265-
funding_msg.channel_id))
6270+
chan.context.channel_id()))
62666271
},
62676272
hash_map::Entry::Vacant(i_e) => {
62686273
let monitor_res = self.chain_monitor.watch_channel(monitor.get_funding_txo().0, monitor);
@@ -6274,10 +6279,12 @@ where
62746279
// hasn't persisted to disk yet - we can't lose money on a transaction that we haven't
62756280
// accepted payment from yet. We do, however, need to wait to send our channel_ready
62766281
// until we have persisted our monitor.
6277-
peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingSigned {
6278-
node_id: counterparty_node_id.clone(),
6279-
msg: funding_msg,
6280-
});
6282+
if let Some(msg) = funding_msg_opt {
6283+
peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingSigned {
6284+
node_id: counterparty_node_id.clone(),
6285+
msg,
6286+
});
6287+
}
62816288

62826289
if let ChannelPhase::Funded(chan) = e.insert(ChannelPhase::Funded(chan)) {
62836290
handle_new_monitor_update!(self, persist_state, peer_state_lock, peer_state,
@@ -6288,9 +6295,13 @@ where
62886295
Ok(())
62896296
} else {
62906297
log_error!(self.logger, "Persisting initial ChannelMonitor failed, implying the funding outpoint was duplicated");
6298+
let channel_id = match funding_msg_opt {
6299+
Some(msg) => msg.channel_id,
6300+
None => chan.context.channel_id(),
6301+
};
62916302
return Err(MsgHandleErrInternal::send_err_msg_no_close(
62926303
"The funding_created message had the same funding_txid as an existing channel - funding is not possible".to_owned(),
6293-
funding_msg.channel_id));
6304+
channel_id));
62946305
}
62956306
}
62966307
}
@@ -7216,6 +7227,66 @@ where
72167227
has_update
72177228
}
72187229

7230+
/// When a call to a [`ChannelSigner`] method returns an error, this indicates that the signer
7231+
/// is (temporarily) unavailable, and the operation should be retried later.
7232+
///
7233+
/// This method allows for that retry - either checking for any signer-pending messages to be
7234+
/// attempted in every channel, or in the specifically provided channel.
7235+
///
7236+
/// [`ChannelSigner`]: crate::sign::ChannelSigner
7237+
#[cfg(test)] // This is only implemented for one signer method, and should be private until we
7238+
// actually finish implementing it fully.
7239+
pub fn signer_unblocked(&self, channel_opt: Option<(PublicKey, ChannelId)>) {
7240+
let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(self);
7241+
7242+
let unblock_chan = |phase: &mut ChannelPhase<SP>, pending_msg_events: &mut Vec<MessageSendEvent>| {
7243+
let node_id = phase.context().get_counterparty_node_id();
7244+
if let ChannelPhase::Funded(chan) = phase {
7245+
let msgs = chan.signer_maybe_unblocked(&self.logger);
7246+
if let Some(updates) = msgs.commitment_update {
7247+
pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
7248+
node_id,
7249+
updates,
7250+
});
7251+
}
7252+
if let Some(msg) = msgs.funding_signed {
7253+
pending_msg_events.push(events::MessageSendEvent::SendFundingSigned {
7254+
node_id,
7255+
msg,
7256+
});
7257+
}
7258+
if let Some(msg) = msgs.funding_created {
7259+
pending_msg_events.push(events::MessageSendEvent::SendFundingCreated {
7260+
node_id,
7261+
msg,
7262+
});
7263+
}
7264+
if let Some(msg) = msgs.channel_ready {
7265+
send_channel_ready!(self, pending_msg_events, chan, msg);
7266+
}
7267+
}
7268+
};
7269+
7270+
let per_peer_state = self.per_peer_state.read().unwrap();
7271+
if let Some((counterparty_node_id, channel_id)) = channel_opt {
7272+
if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
7273+
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
7274+
let peer_state = &mut *peer_state_lock;
7275+
if let Some(chan) = peer_state.channel_by_id.get_mut(&channel_id) {
7276+
unblock_chan(chan, &mut peer_state.pending_msg_events);
7277+
}
7278+
}
7279+
} else {
7280+
for (_cp_id, peer_state_mutex) in per_peer_state.iter() {
7281+
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
7282+
let peer_state = &mut *peer_state_lock;
7283+
for (_, chan) in peer_state.channel_by_id.iter_mut() {
7284+
unblock_chan(chan, &mut peer_state.pending_msg_events);
7285+
}
7286+
}
7287+
}
7288+
}
7289+
72197290
/// Check whether any channels have finished removing all pending updates after a shutdown
72207291
/// exchange and can now send a closing_signed.
72217292
/// Returns whether any closing_signed messages were generated.

lightning/src/ln/functional_test_utils.rs

+35-8
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ use crate::util::test_utils::{panicking, TestChainMonitor, TestScorer, TestKeysI
3030
use crate::util::errors::APIError;
3131
use crate::util::config::{UserConfig, MaxDustHTLCExposure};
3232
use crate::util::ser::{ReadableArgs, Writeable};
33+
#[cfg(test)]
34+
use crate::util::logger::Logger;
3335

3436
use bitcoin::blockdata::block::{Block, BlockHeader};
3537
use bitcoin::blockdata::transaction::{Transaction, TxOut};
@@ -436,6 +438,25 @@ impl<'a, 'b, 'c> Node<'a, 'b, 'c> {
436438
pub fn get_block_header(&self, height: u32) -> BlockHeader {
437439
self.blocks.lock().unwrap()[height as usize].0.header
438440
}
441+
/// Changes the channel signer's availability for the specified peer and channel.
442+
///
443+
/// When `available` is set to `true`, the channel signer will behave normally. When set to
444+
/// `false`, the channel signer will act like an off-line remote signer and will return `Err` for
445+
/// several of the signing methods. Currently, only `get_per_commitment_point` and
446+
/// `release_commitment_secret` are affected by this setting.
447+
#[cfg(test)]
448+
pub fn set_channel_signer_available(&self, peer_id: &PublicKey, chan_id: &ChannelId, available: bool) {
449+
let per_peer_state = self.node.per_peer_state.read().unwrap();
450+
let chan_lock = per_peer_state.get(peer_id).unwrap().lock().unwrap();
451+
let signer = (|| {
452+
match chan_lock.channel_by_id.get(chan_id) {
453+
Some(phase) => phase.context().get_signer(),
454+
None => panic!("Couldn't find a channel with id {}", chan_id),
455+
}
456+
})();
457+
log_debug!(self.logger, "Setting channel signer for {} as available={}", chan_id, available);
458+
signer.as_ecdsa().unwrap().set_available(available);
459+
}
439460
}
440461

441462
/// If we need an unsafe pointer to a `Node` (ie to reference it in a thread
@@ -924,7 +945,8 @@ macro_rules! unwrap_send_err {
924945
pub fn check_added_monitors<CM: AChannelManager, H: NodeHolder<CM=CM>>(node: &H, count: usize) {
925946
if let Some(chain_monitor) = node.chain_monitor() {
926947
let mut added_monitors = chain_monitor.added_monitors.lock().unwrap();
927-
assert_eq!(added_monitors.len(), count);
948+
let n = added_monitors.len();
949+
assert_eq!(n, count, "expected {} monitors to be added, not {}", count, n);
928950
added_monitors.clear();
929951
}
930952
}
@@ -2119,12 +2141,13 @@ macro_rules! expect_channel_shutdown_state {
21192141
}
21202142

21212143
#[cfg(any(test, ldk_bench, feature = "_test_utils"))]
2122-
pub fn expect_channel_pending_event<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, expected_counterparty_node_id: &PublicKey) {
2144+
pub fn expect_channel_pending_event<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, expected_counterparty_node_id: &PublicKey) -> ChannelId {
21232145
let events = node.node.get_and_clear_pending_events();
21242146
assert_eq!(events.len(), 1);
2125-
match events[0] {
2126-
crate::events::Event::ChannelPending { ref counterparty_node_id, .. } => {
2147+
match &events[0] {
2148+
crate::events::Event::ChannelPending { channel_id, counterparty_node_id, .. } => {
21272149
assert_eq!(*expected_counterparty_node_id, *counterparty_node_id);
2150+
*channel_id
21282151
},
21292152
_ => panic!("Unexpected event"),
21302153
}
@@ -3232,24 +3255,28 @@ pub fn reconnect_nodes<'a, 'b, 'c, 'd>(args: ReconnectArgs<'a, 'b, 'c, 'd>) {
32323255
// If a expects a channel_ready, it better not think it has received a revoke_and_ack
32333256
// from b
32343257
for reestablish in reestablish_1.iter() {
3235-
assert_eq!(reestablish.next_remote_commitment_number, 0);
3258+
let n = reestablish.next_remote_commitment_number;
3259+
assert_eq!(n, 0, "expected a->b next_remote_commitment_number to be 0, got {}", n);
32363260
}
32373261
}
32383262
if send_channel_ready.1 {
32393263
// If b expects a channel_ready, it better not think it has received a revoke_and_ack
32403264
// from a
32413265
for reestablish in reestablish_2.iter() {
3242-
assert_eq!(reestablish.next_remote_commitment_number, 0);
3266+
let n = reestablish.next_remote_commitment_number;
3267+
assert_eq!(n, 0, "expected b->a next_remote_commitment_number to be 0, got {}", n);
32433268
}
32443269
}
32453270
if send_channel_ready.0 || send_channel_ready.1 {
32463271
// If we expect any channel_ready's, both sides better have set
32473272
// next_holder_commitment_number to 1
32483273
for reestablish in reestablish_1.iter() {
3249-
assert_eq!(reestablish.next_local_commitment_number, 1);
3274+
let n = reestablish.next_local_commitment_number;
3275+
assert_eq!(n, 1, "expected a->b next_local_commitment_number to be 1, got {}", n);
32503276
}
32513277
for reestablish in reestablish_2.iter() {
3252-
assert_eq!(reestablish.next_local_commitment_number, 1);
3278+
let n = reestablish.next_local_commitment_number;
3279+
assert_eq!(n, 1, "expected b->a next_local_commitment_number to be 1, got {}", n);
32533280
}
32543281
}
32553282

lightning/src/ln/functional_tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -9045,7 +9045,7 @@ fn test_duplicate_chan_id() {
90459045
}
90469046
};
90479047
check_added_monitors!(nodes[0], 0);
9048-
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created);
9048+
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created.unwrap());
90499049
// At this point we'll look up if the channel_id is present and immediately fail the channel
90509050
// without trying to persist the `ChannelMonitor`.
90519051
check_added_monitors!(nodes[1], 0);

lightning/src/ln/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ mod monitor_tests;
7373
#[cfg(test)]
7474
#[allow(unused_mut)]
7575
mod shutdown_tests;
76+
#[cfg(test)]
77+
#[allow(unused_mut)]
78+
mod async_signer_tests;
7679

7780
pub use self::peer_channel_encryptor::LN_MAX_MSG_LEN;
7881

lightning/src/util/test_channel_signer.rs

+26-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ pub struct TestChannelSigner {
5656
/// Channel state used for policy enforcement
5757
pub state: Arc<Mutex<EnforcementState>>,
5858
pub disable_revocation_policy_check: bool,
59+
/// When `true` (the default), the signer will respond immediately with signatures. When `false`,
60+
/// the signer will return an error indicating that it is unavailable.
61+
pub available: Arc<Mutex<bool>>,
5962
}
6063

6164
impl PartialEq for TestChannelSigner {
@@ -71,7 +74,8 @@ impl TestChannelSigner {
7174
Self {
7275
inner,
7376
state,
74-
disable_revocation_policy_check: false
77+
disable_revocation_policy_check: false,
78+
available: Arc::new(Mutex::new(true)),
7579
}
7680
}
7781

@@ -84,7 +88,8 @@ impl TestChannelSigner {
8488
Self {
8589
inner,
8690
state,
87-
disable_revocation_policy_check
91+
disable_revocation_policy_check,
92+
available: Arc::new(Mutex::new(true)),
8893
}
8994
}
9095

@@ -94,6 +99,16 @@ impl TestChannelSigner {
9499
pub fn get_enforcement_state(&self) -> MutexGuard<EnforcementState> {
95100
self.state.lock().unwrap()
96101
}
102+
103+
/// Marks the signer's availability.
104+
///
105+
/// When `true`, methods are forwarded to the underlying signer as normal. When `false`, some
106+
/// methods will return `Err` indicating that the signer is unavailable. Intended to be used for
107+
/// testing asynchronous signing.
108+
#[cfg(test)]
109+
pub fn set_available(&self, available: bool) {
110+
*self.available.lock().unwrap() = available;
111+
}
97112
}
98113

99114
impl ChannelSigner for TestChannelSigner {
@@ -133,6 +148,9 @@ impl EcdsaChannelSigner for TestChannelSigner {
133148
self.verify_counterparty_commitment_tx(commitment_tx, secp_ctx);
134149

135150
{
151+
if !*self.available.lock().unwrap() {
152+
return Err(());
153+
}
136154
let mut state = self.state.lock().unwrap();
137155
let actual_commitment_number = commitment_tx.commitment_number();
138156
let last_commitment_number = state.last_counterparty_commitment;
@@ -149,13 +167,19 @@ impl EcdsaChannelSigner for TestChannelSigner {
149167
}
150168

151169
fn validate_counterparty_revocation(&self, idx: u64, _secret: &SecretKey) -> Result<(), ()> {
170+
if !*self.available.lock().unwrap() {
171+
return Err(());
172+
}
152173
let mut state = self.state.lock().unwrap();
153174
assert!(idx == state.last_counterparty_revoked_commitment || idx == state.last_counterparty_revoked_commitment - 1, "expecting to validate the current or next counterparty revocation - trying {}, current {}", idx, state.last_counterparty_revoked_commitment);
154175
state.last_counterparty_revoked_commitment = idx;
155176
Ok(())
156177
}
157178

158179
fn sign_holder_commitment(&self, commitment_tx: &HolderCommitmentTransaction, secp_ctx: &Secp256k1<secp256k1::All>) -> Result<Signature, ()> {
180+
if !*self.available.lock().unwrap() {
181+
return Err(());
182+
}
159183
let trusted_tx = self.verify_holder_commitment_tx(commitment_tx, secp_ctx);
160184
let state = self.state.lock().unwrap();
161185
let commitment_number = trusted_tx.commitment_number();

0 commit comments

Comments
 (0)