Skip to content

Commit 06eddc3

Browse files
authored
Merge pull request #320 from TheBlueMatt/2019-03-chan-send-rewrite
Rewrite Channel resend tracking to make it much more reliable
2 parents 2811b07 + c2a3fc7 commit 06eddc3

File tree

3 files changed

+78
-73
lines changed

3 files changed

+78
-73
lines changed

fuzz/fuzz_targets/chanmon_fail_consistency.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ use utils::test_logger;
4949
use secp256k1::key::{PublicKey,SecretKey};
5050
use secp256k1::Secp256k1;
5151

52+
use std::cmp::Ordering;
53+
use std::collections::HashSet;
5254
use std::sync::{Arc,Mutex};
5355
use std::io::Cursor;
5456

@@ -435,13 +437,36 @@ pub fn do_test(data: &[u8]) {
435437

436438
macro_rules! process_events {
437439
($node: expr, $fail: expr) => { {
438-
for event in nodes[$node].get_and_clear_pending_events() {
440+
// In case we get 256 payments we may have a hash collision, resulting in the
441+
// second claim/fail call not finding the duplicate-hash HTLC, so we have to
442+
// deduplicate the calls here.
443+
let mut claim_set = HashSet::new();
444+
let mut events = nodes[$node].get_and_clear_pending_events();
445+
// Sort events so that PendingHTLCsForwardable get processed last. This avoids a
446+
// case where we first process a PendingHTLCsForwardable, then claim/fail on a
447+
// PaymentReceived, claiming/failing two HTLCs, but leaving a just-generated
448+
// PaymentReceived event for the second HTLC in our pending_events (and breaking
449+
// our claim_set deduplication).
450+
events.sort_by(|a, b| {
451+
if let events::Event::PaymentReceived { .. } = a {
452+
if let events::Event::PendingHTLCsForwardable { .. } = b {
453+
Ordering::Less
454+
} else { Ordering::Equal }
455+
} else if let events::Event::PendingHTLCsForwardable { .. } = a {
456+
if let events::Event::PaymentReceived { .. } = b {
457+
Ordering::Greater
458+
} else { Ordering::Equal }
459+
} else { Ordering::Equal }
460+
});
461+
for event in events.drain(..) {
439462
match event {
440463
events::Event::PaymentReceived { payment_hash, .. } => {
441-
if $fail {
442-
assert!(nodes[$node].fail_htlc_backwards(&payment_hash));
443-
} else {
444-
assert!(nodes[$node].claim_funds(PaymentPreimage(payment_hash.0)));
464+
if claim_set.insert(payment_hash.0) {
465+
if $fail {
466+
assert!(nodes[$node].fail_htlc_backwards(&payment_hash));
467+
} else {
468+
assert!(nodes[$node].claim_funds(PaymentPreimage(payment_hash.0)));
469+
}
445470
}
446471
},
447472
events::Event::PaymentSent { .. } => {},

src/ln/channel.rs

Lines changed: 47 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -237,19 +237,21 @@ pub(super) struct Channel {
237237
cur_local_commitment_transaction_number: u64,
238238
cur_remote_commitment_transaction_number: u64,
239239
value_to_self_msat: u64, // Excluding all pending_htlcs, excluding fees
240-
/// Upon receipt of a channel_reestablish we have to figure out whether to send a
241-
/// revoke_and_ack first or a commitment update first. Generally, we prefer to send
242-
/// revoke_and_ack first, but if we had a pending commitment update of our own waiting on a
243-
/// remote revoke when we received the latest commitment update from the remote we have to make
244-
/// sure that commitment update gets resent first.
245-
received_commitment_while_awaiting_raa: bool,
246240
pending_inbound_htlcs: Vec<InboundHTLCOutput>,
247241
pending_outbound_htlcs: Vec<OutboundHTLCOutput>,
248242
holding_cell_htlc_updates: Vec<HTLCUpdateAwaitingACK>,
249243

244+
/// When resending CS/RAA messages on channel monitor restoration or on reconnect, we always
245+
/// need to ensure we resend them in the order we originally generated them. Note that because
246+
/// there can only ever be one in-flight CS and/or one in-flight RAA at any time, it is
247+
/// sufficient to simply set this to the opposite of any message we are generating as we
248+
/// generate it. ie when we generate a CS, we set this to RAAFirst as, if there is a pending
249+
/// in-flight RAA to resend, it will have been the first thing we generated, and thus we should
250+
/// send it first.
251+
resend_order: RAACommitmentOrder,
252+
250253
monitor_pending_revoke_and_ack: bool,
251254
monitor_pending_commitment_signed: bool,
252-
monitor_pending_order: Option<RAACommitmentOrder>,
253255
monitor_pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>,
254256
monitor_pending_failures: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>,
255257

@@ -450,7 +452,6 @@ impl Channel {
450452
cur_local_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER,
451453
cur_remote_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER,
452454
value_to_self_msat: channel_value_satoshis * 1000 - push_msat,
453-
received_commitment_while_awaiting_raa: false,
454455

455456
pending_inbound_htlcs: Vec::new(),
456457
pending_outbound_htlcs: Vec::new(),
@@ -461,9 +462,10 @@ impl Channel {
461462
next_remote_htlc_id: 0,
462463
channel_update_count: 1,
463464

465+
resend_order: RAACommitmentOrder::CommitmentFirst,
466+
464467
monitor_pending_revoke_and_ack: false,
465468
monitor_pending_commitment_signed: false,
466-
monitor_pending_order: None,
467469
monitor_pending_forwards: Vec::new(),
468470
monitor_pending_failures: Vec::new(),
469471

@@ -639,7 +641,6 @@ impl Channel {
639641
cur_local_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER,
640642
cur_remote_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER,
641643
value_to_self_msat: msg.push_msat,
642-
received_commitment_while_awaiting_raa: false,
643644

644645
pending_inbound_htlcs: Vec::new(),
645646
pending_outbound_htlcs: Vec::new(),
@@ -650,9 +651,10 @@ impl Channel {
650651
next_remote_htlc_id: 0,
651652
channel_update_count: 1,
652653

654+
resend_order: RAACommitmentOrder::CommitmentFirst,
655+
653656
monitor_pending_revoke_and_ack: false,
654657
monitor_pending_commitment_signed: false,
655-
monitor_pending_order: None,
656658
monitor_pending_forwards: Vec::new(),
657659
monitor_pending_failures: Vec::new(),
658660

@@ -1805,12 +1807,6 @@ impl Channel {
18051807
}
18061808
}
18071809

1808-
if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 {
1809-
// This is a response to our post-monitor-failed unfreeze messages, so we can clear the
1810-
// monitor_pending_order requirement as we won't re-send the monitor_pending messages.
1811-
self.monitor_pending_order = None;
1812-
}
1813-
18141810
self.channel_monitor.provide_latest_local_commitment_tx_info(local_commitment_tx.0, local_keys, self.feerate_per_kw, htlcs_and_sigs);
18151811

18161812
for htlc in self.pending_inbound_htlcs.iter_mut() {
@@ -1833,14 +1829,13 @@ impl Channel {
18331829

18341830
self.cur_local_commitment_transaction_number -= 1;
18351831
self.last_local_commitment_txn = new_local_commitment_txn;
1836-
self.received_commitment_while_awaiting_raa = (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32)) != 0;
1832+
// Note that if we need_our_commitment & !AwaitingRemoteRevoke we'll call
1833+
// send_commitment_no_status_check() next which will reset this to RAAFirst.
1834+
self.resend_order = RAACommitmentOrder::CommitmentFirst;
18371835

18381836
if (self.channel_state & ChannelState::MonitorUpdateFailed as u32) != 0 {
18391837
// In case we initially failed monitor updating without requiring a response, we need
18401838
// to make sure the RAA gets sent first.
1841-
if !self.monitor_pending_commitment_signed {
1842-
self.monitor_pending_order = Some(RAACommitmentOrder::RevokeAndACKFirst);
1843-
}
18441839
self.monitor_pending_revoke_and_ack = true;
18451840
if need_our_commitment && (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32)) == 0 {
18461841
// If we were going to send a commitment_signed after the RAA, go ahead and do all
@@ -2014,12 +2009,6 @@ impl Channel {
20142009
self.their_prev_commitment_point = self.their_cur_commitment_point;
20152010
self.their_cur_commitment_point = Some(msg.next_per_commitment_point);
20162011
self.cur_remote_commitment_transaction_number -= 1;
2017-
self.received_commitment_while_awaiting_raa = false;
2018-
if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 {
2019-
// This is a response to our post-monitor-failed unfreeze messages, so we can clear the
2020-
// monitor_pending_order requirement as we won't re-send the monitor_pending messages.
2021-
self.monitor_pending_order = None;
2022-
}
20232012

20242013
log_trace!(self, "Updating HTLCs on receipt of RAA...");
20252014
let mut to_forward_infos = Vec::new();
@@ -2137,7 +2126,7 @@ impl Channel {
21372126
// When the monitor updating is restored we'll call get_last_commitment_update(),
21382127
// which does not update state, but we're definitely now awaiting a remote revoke
21392128
// before we can step forward any more, so set it here.
2140-
self.channel_state |= ChannelState::AwaitingRemoteRevoke as u32;
2129+
self.send_commitment_no_status_check()?;
21412130
}
21422131
self.monitor_pending_forwards.append(&mut to_forward_infos);
21432132
self.monitor_pending_failures.append(&mut revoked_htlcs);
@@ -2285,15 +2274,13 @@ impl Channel {
22852274
/// Indicates that a ChannelMonitor update failed to be stored by the client and further
22862275
/// updates are partially paused.
22872276
/// This must be called immediately after the call which generated the ChannelMonitor update
2288-
/// which failed, with the order argument set to the type of call it represented (ie a
2289-
/// commitment update or a revoke_and_ack generation). The messages which were generated from
2290-
/// that original call must *not* have been sent to the remote end, and must instead have been
2291-
/// dropped. They will be regenerated when monitor_updating_restored is called.
2292-
pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder, resend_raa: bool, resend_commitment: bool, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>) {
2277+
/// which failed. The messages which were generated from that call which generated the
2278+
/// monitor update failure must *not* have been sent to the remote end, and must instead
2279+
/// have been dropped. They will be regenerated when monitor_updating_restored is called.
2280+
pub fn monitor_update_failed(&mut self, resend_raa: bool, resend_commitment: bool, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>) {
22932281
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
22942282
self.monitor_pending_revoke_and_ack = resend_raa;
22952283
self.monitor_pending_commitment_signed = resend_commitment;
2296-
self.monitor_pending_order = Some(order);
22972284
assert!(self.monitor_pending_forwards.is_empty());
22982285
mem::swap(&mut pending_forwards, &mut self.monitor_pending_forwards);
22992286
assert!(self.monitor_pending_failures.is_empty());
@@ -2314,7 +2301,6 @@ impl Channel {
23142301
mem::swap(&mut failures, &mut self.monitor_pending_failures);
23152302

23162303
if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
2317-
// Leave monitor_pending_order so we can order our channel_reestablish responses
23182304
self.monitor_pending_revoke_and_ack = false;
23192305
self.monitor_pending_commitment_signed = false;
23202306
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures);
@@ -2329,7 +2315,12 @@ impl Channel {
23292315

23302316
self.monitor_pending_revoke_and_ack = false;
23312317
self.monitor_pending_commitment_signed = false;
2332-
(raa, commitment_update, self.monitor_pending_order.clone().unwrap(), forwards, failures)
2318+
let order = self.resend_order.clone();
2319+
log_trace!(self, "Restored monitor updating resulting in {} commitment update and {} RAA, with {} first",
2320+
if commitment_update.is_some() { "a" } else { "no" },
2321+
if raa.is_some() { "an" } else { "no" },
2322+
match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
2323+
(raa, commitment_update, order, forwards, failures)
23332324
}
23342325

23352326
pub fn update_fee(&mut self, fee_estimator: &FeeEstimator, msg: &msgs::UpdateFee) -> Result<(), ChannelError> {
@@ -2487,33 +2478,26 @@ impl Channel {
24872478
})
24882479
} else { None };
24892480

2490-
let order = self.monitor_pending_order.clone().unwrap_or(if self.received_commitment_while_awaiting_raa {
2491-
RAACommitmentOrder::CommitmentFirst
2492-
} else {
2493-
RAACommitmentOrder::RevokeAndACKFirst
2494-
});
2495-
24962481
if msg.next_local_commitment_number == our_next_remote_commitment_number {
24972482
if required_revoke.is_some() {
24982483
log_debug!(self, "Reconnected channel {} with only lost outbound RAA", log_bytes!(self.channel_id()));
24992484
} else {
25002485
log_debug!(self, "Reconnected channel {} with no loss", log_bytes!(self.channel_id()));
25012486
}
25022487

2503-
if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 &&
2504-
self.monitor_pending_order.is_none() { // monitor_pending_order indicates we're waiting on a response to a unfreeze
2488+
if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 {
25052489
// We're up-to-date and not waiting on a remote revoke (if we are our
25062490
// channel_reestablish should result in them sending a revoke_and_ack), but we may
25072491
// have received some updates while we were disconnected. Free the holding cell
25082492
// now!
25092493
match self.free_holding_cell_htlcs() {
25102494
Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)),
25112495
Err(ChannelError::Ignore(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"),
2512-
Ok(Some((commitment_update, channel_monitor))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(channel_monitor), order, shutdown_msg)),
2513-
Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, order, shutdown_msg)),
2496+
Ok(Some((commitment_update, channel_monitor))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(channel_monitor), self.resend_order.clone(), shutdown_msg)),
2497+
Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg)),
25142498
}
25152499
} else {
2516-
return Ok((resend_funding_locked, required_revoke, None, None, order, shutdown_msg));
2500+
return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
25172501
}
25182502
} else if msg.next_local_commitment_number == our_next_remote_commitment_number - 1 {
25192503
if required_revoke.is_some() {
@@ -2524,10 +2508,10 @@ impl Channel {
25242508

25252509
if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
25262510
self.monitor_pending_commitment_signed = true;
2527-
return Ok((resend_funding_locked, None, None, None, order, shutdown_msg));
2511+
return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), shutdown_msg));
25282512
}
25292513

2530-
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update()), None, order, shutdown_msg));
2514+
return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update()), None, self.resend_order.clone(), shutdown_msg));
25312515
} else {
25322516
return Err(ChannelError::Close("Peer attempted to reestablish channel with a very old remote commitment transaction"));
25332517
}
@@ -3348,6 +3332,7 @@ impl Channel {
33483332
htlc.state = OutboundHTLCState::AwaitingRemovedRemoteRevoke(fail_reason);
33493333
}
33503334
}
3335+
self.resend_order = RAACommitmentOrder::RevokeAndACKFirst;
33513336

33523337
let (res, remote_commitment_tx, htlcs) = match self.send_commitment_no_state_update() {
33533338
Ok((res, (remote_commitment_tx, mut htlcs))) => {
@@ -3558,8 +3543,6 @@ impl Writeable for Channel {
35583543
self.cur_remote_commitment_transaction_number.write(writer)?;
35593544
self.value_to_self_msat.write(writer)?;
35603545

3561-
self.received_commitment_while_awaiting_raa.write(writer)?;
3562-
35633546
let mut dropped_inbound_htlcs = 0;
35643547
for htlc in self.pending_inbound_htlcs.iter() {
35653548
if let InboundHTLCState::RemoteAnnounced(_) = htlc.state {
@@ -3659,13 +3642,13 @@ impl Writeable for Channel {
36593642
}
36603643
}
36613644

3645+
match self.resend_order {
3646+
RAACommitmentOrder::CommitmentFirst => 0u8.write(writer)?,
3647+
RAACommitmentOrder::RevokeAndACKFirst => 1u8.write(writer)?,
3648+
}
3649+
36623650
self.monitor_pending_revoke_and_ack.write(writer)?;
36633651
self.monitor_pending_commitment_signed.write(writer)?;
3664-
match self.monitor_pending_order {
3665-
None => 0u8.write(writer)?,
3666-
Some(RAACommitmentOrder::CommitmentFirst) => 1u8.write(writer)?,
3667-
Some(RAACommitmentOrder::RevokeAndACKFirst) => 2u8.write(writer)?,
3668-
}
36693652

36703653
(self.monitor_pending_forwards.len() as u64).write(writer)?;
36713654
for &(ref pending_forward, ref htlc_id) in self.monitor_pending_forwards.iter() {
@@ -3763,8 +3746,6 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
37633746
let cur_remote_commitment_transaction_number = Readable::read(reader)?;
37643747
let value_to_self_msat = Readable::read(reader)?;
37653748

3766-
let received_commitment_while_awaiting_raa = Readable::read(reader)?;
3767-
37683749
let pending_inbound_htlc_count: u64 = Readable::read(reader)?;
37693750
let mut pending_inbound_htlcs = Vec::with_capacity(cmp::min(pending_inbound_htlc_count as usize, OUR_MAX_HTLCS as usize));
37703751
for _ in 0..pending_inbound_htlc_count {
@@ -3827,16 +3808,15 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
38273808
});
38283809
}
38293810

3830-
let monitor_pending_revoke_and_ack = Readable::read(reader)?;
3831-
let monitor_pending_commitment_signed = Readable::read(reader)?;
3832-
3833-
let monitor_pending_order = match <u8 as Readable<R>>::read(reader)? {
3834-
0 => None,
3835-
1 => Some(RAACommitmentOrder::CommitmentFirst),
3836-
2 => Some(RAACommitmentOrder::RevokeAndACKFirst),
3811+
let resend_order = match <u8 as Readable<R>>::read(reader)? {
3812+
0 => RAACommitmentOrder::CommitmentFirst,
3813+
1 => RAACommitmentOrder::RevokeAndACKFirst,
38373814
_ => return Err(DecodeError::InvalidValue),
38383815
};
38393816

3817+
let monitor_pending_revoke_and_ack = Readable::read(reader)?;
3818+
let monitor_pending_commitment_signed = Readable::read(reader)?;
3819+
38403820
let monitor_pending_forwards_count: u64 = Readable::read(reader)?;
38413821
let mut monitor_pending_forwards = Vec::with_capacity(cmp::min(monitor_pending_forwards_count as usize, OUR_MAX_HTLCS as usize));
38423822
for _ in 0..monitor_pending_forwards_count {
@@ -3923,14 +3903,14 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
39233903
cur_remote_commitment_transaction_number,
39243904
value_to_self_msat,
39253905

3926-
received_commitment_while_awaiting_raa,
39273906
pending_inbound_htlcs,
39283907
pending_outbound_htlcs,
39293908
holding_cell_htlc_updates,
39303909

3910+
resend_order,
3911+
39313912
monitor_pending_revoke_and_ack,
39323913
monitor_pending_commitment_signed,
3933-
monitor_pending_order,
39343914
monitor_pending_forwards,
39353915
monitor_pending_failures,
39363916

src/ln/channelmanager.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ macro_rules! handle_monitor_err {
494494
if !$resend_raa {
495495
debug_assert!($action_type == RAACommitmentOrder::CommitmentFirst || !$resend_commitment);
496496
}
497-
$entry.get_mut().monitor_update_failed($action_type, $resend_raa, $resend_commitment, $failed_forwards, $failed_fails);
497+
$entry.get_mut().monitor_update_failed($resend_raa, $resend_commitment, $failed_forwards, $failed_fails);
498498
Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor"), *$entry.key()))
499499
},
500500
}

0 commit comments

Comments
 (0)