Skip to content

Commit 60d83ef

Browse files
committed
Free holding cell on monitor-updating-restored when there's no upd
If there is no pending channel update messages when monitor updating is restored (though there may be an RAA to send), and we're connected to our peer and not awaiting a remote RAA, we need to free anything in our holding cell. Without this, chanmon_fail_consistency was able to find a stuck condition where we sit on an HTLC failure in our holding cell and don't ever handle it (at least until we have other actions to take which empty the holding cell). Still, this approach sucks - it introduces reentrancy in a particularly dangerous form: a) we re-enter user code around monitor updates while being called from user code around monitor updates, making deadlocks very likely (in fact, our current tests have a bug here!), b) the re-entrancy only occurs in a very rare case, making it likely users will not hit it in testing, only deadlocking in production. I'm not entirely sure what the alternative is, however - we could move to a world where we poll for holding cell events that can be freed on our 1-minute-timer, but we still have a super rare reentrancy case, just in timer_chan_freshness_every_min() instead.
1 parent 4a838d7 commit 60d83ef

File tree

2 files changed

+53
-17
lines changed

2 files changed

+53
-17
lines changed

lightning/src/ln/channel.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,7 +2605,10 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
26052605
/// Indicates that the latest ChannelMonitor update has been committed by the client
26062606
/// successfully and we should restore normal operation. Returns messages which should be sent
26072607
/// to the remote side.
2608-
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option<msgs::FundingLocked>) where L::Target: Logger {
2608+
pub fn monitor_updating_restored<L: Deref>(&mut self, logger: &L) -> (
2609+
Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Option<ChannelMonitorUpdate>,
2610+
Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Vec<(HTLCSource, PaymentHash)>,
2611+
bool, Option<msgs::FundingLocked>) where L::Target: Logger {
26092612
assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32);
26102613
self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32);
26112614

@@ -2635,25 +2638,39 @@ impl<ChanSigner: ChannelKeys> Channel<ChanSigner> {
26352638
if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
26362639
self.monitor_pending_revoke_and_ack = false;
26372640
self.monitor_pending_commitment_signed = false;
2638-
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked);
2641+
return (None, None, RAACommitmentOrder::RevokeAndACKFirst, None, forwards, failures, Vec::new(), needs_broadcast_safe, funding_locked);
26392642
}
26402643

26412644
let raa = if self.monitor_pending_revoke_and_ack {
26422645
Some(self.get_last_revoke_and_ack())
26432646
} else { None };
2644-
let commitment_update = if self.monitor_pending_commitment_signed {
2647+
let mut commitment_update = if self.monitor_pending_commitment_signed {
26452648
Some(self.get_last_commitment_update(logger))
26462649
} else { None };
26472650

2651+
let mut order = self.resend_order.clone();
26482652
self.monitor_pending_revoke_and_ack = false;
26492653
self.monitor_pending_commitment_signed = false;
2650-
let order = self.resend_order.clone();
2654+
2655+
let mut htlcs_failed_to_forward = Vec::new();
2656+
let mut chanmon_update = None;
2657+
if commitment_update.is_none() && self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32) == 0 {
2658+
order = RAACommitmentOrder::RevokeAndACKFirst;
2659+
2660+
let (update_opt, mut failed_htlcs) = self.free_holding_cell_htlcs(logger).unwrap();
2661+
htlcs_failed_to_forward.append(&mut failed_htlcs);
2662+
if let Some((com_update, mon_update)) = update_opt {
2663+
commitment_update = Some(com_update);
2664+
chanmon_update = Some(mon_update);
2665+
}
2666+
}
2667+
26512668
log_trace!(logger, "Restored monitor updating resulting in {}{} commitment update and {} RAA, with {} first",
26522669
if needs_broadcast_safe { "a funding broadcast safe, " } else { "" },
26532670
if commitment_update.is_some() { "a" } else { "no" },
26542671
if raa.is_some() { "an" } else { "no" },
26552672
match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
2656-
(raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked)
2673+
(raa, commitment_update, order, chanmon_update, forwards, failures, htlcs_failed_to_forward, needs_broadcast_safe, funding_locked)
26572674
}
26582675

26592676
pub fn update_fee<F: Deref>(&mut self, fee_estimator: &F, msg: &msgs::UpdateFee) -> Result<(), ChannelError>

lightning/src/ln/channelmanager.rs

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -710,20 +710,30 @@ macro_rules! maybe_break_monitor_err {
710710
}
711711

712712
macro_rules! handle_chan_restoration_locked {
713-
($self: expr, $channel_lock: expr, $channel_state: expr, $channel_entry: expr,
714-
$raa: expr, $commitment_update: expr, $order: expr,
713+
($self: ident, $channel_lock: expr, $channel_state: expr, $channel_entry: expr,
714+
$raa: expr, $commitment_update: expr, $order: expr, $chanmon_update: expr,
715715
$pending_forwards: expr, $broadcast_safe: expr, $funding_locked: expr) => { {
716716
let mut htlc_forwards = None;
717717
let mut funding_broadcast_safe = None;
718718
let counterparty_node_id = $channel_entry.get().get_counterparty_node_id();
719+
let channel_id = $channel_entry.get().channel_id();
719720

720-
{
721+
let res = loop {
721722
if !$pending_forwards.is_empty() {
722723
htlc_forwards = Some(($channel_entry.get().get_short_channel_id().expect("We can't have pending forwards before funding confirmation"),
723724
$channel_entry.get().get_funding_txo().unwrap(), $pending_forwards));
724725
}
725726

726727
macro_rules! handle_cs { () => {
728+
if let Some(monitor_update) = $chanmon_update {
729+
assert!($order == RAACommitmentOrder::RevokeAndACKFirst);
730+
assert!(!$broadcast_safe);
731+
assert!($funding_locked.is_none());
732+
assert!($commitment_update.is_some());
733+
if let Err(e) = $self.chain_monitor.update_channel($channel_entry.get().get_funding_txo().unwrap(), monitor_update) {
734+
break handle_monitor_err!($self, e, $channel_state, $channel_entry, RAACommitmentOrder::CommitmentFirst, false, true);
735+
}
736+
}
727737
if let Some(update) = $commitment_update {
728738
$channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
729739
node_id: counterparty_node_id,
@@ -766,21 +776,26 @@ macro_rules! handle_chan_restoration_locked {
766776
msg: announcement_sigs,
767777
});
768778
}
769-
$channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), $channel_entry.get().channel_id());
779+
$channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), channel_id);
770780
}
771-
}
772-
(htlc_forwards, funding_broadcast_safe)
781+
break Ok(());
782+
};
783+
784+
(htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id)
773785
} }
774786
}
775787

776788
macro_rules! post_handle_chan_restoration {
777-
($self: expr, $locked_res: expr, $pending_failures: expr) => { {
778-
let (htlc_forwards, funding_broadcast_safe) = $locked_res;
789+
($self: ident, $locked_res: expr, $pending_failures: expr, $forwarding_failures: expr) => { {
790+
let (htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id) = $locked_res;
791+
792+
let _ = handle_error!($self, res, counterparty_node_id);
779793

780794
if let Some(ev) = funding_broadcast_safe {
781795
$self.pending_events.lock().unwrap().push(ev);
782796
}
783797

798+
$self.fail_holding_cell_htlcs($forwarding_failures, channel_id);
784799
for failure in $pending_failures.drain(..) {
785800
$self.fail_htlc_backwards_internal($self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
786801
}
@@ -2280,6 +2295,10 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
22802295
/// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field
22812296
/// exists largely only to prevent races between this and concurrent update_monitor calls.
22822297
///
2298+
/// XXX: Update to note re-entrancy - this is really terrible - the reentrancy only happens in
2299+
/// a really rare case making it incredibly likely users will miss it and never hit it in
2300+
/// testing.
2301+
///
22832302
/// Thus, the anticipated use is, at a high level:
22842303
/// 1) You register a chain::Watch with this ChannelManager,
22852304
/// 2) it stores each update to disk, and begins updating any remote (eg watchtower) copies of
@@ -2291,7 +2310,7 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
22912310
pub fn channel_monitor_updated(&self, funding_txo: &OutPoint, highest_applied_update_id: u64) {
22922311
let _consistency_lock = self.total_consistency_lock.read().unwrap();
22932312

2294-
let (mut pending_failures, chan_restoration_res) = {
2313+
let (mut pending_failures, forwarding_failds, chan_restoration_res) = {
22952314
let mut channel_lock = self.channel_state.lock().unwrap();
22962315
let channel_state = &mut *channel_lock;
22972316
let mut channel = match channel_state.by_id.entry(funding_txo.to_channel_id()) {
@@ -2302,10 +2321,10 @@ impl<ChanSigner: ChannelKeys, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
23022321
return;
23032322
}
23042323

2305-
let (raa, commitment_update, order, pending_forwards, pending_failures, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger);
2306-
(pending_failures, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, pending_forwards, needs_broadcast_safe, funding_locked))
2324+
let (raa, commitment_update, order, chanmon_update, pending_forwards, pending_failures, forwarding_failds, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger);
2325+
(pending_failures, forwarding_failds, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, chanmon_update, pending_forwards, needs_broadcast_safe, funding_locked))
23072326
};
2308-
post_handle_chan_restoration!(self, chan_restoration_res, pending_failures);
2327+
post_handle_chan_restoration!(self, chan_restoration_res, pending_failures, forwarding_failds);
23092328
}
23102329

23112330
fn internal_open_channel(&self, counterparty_node_id: &PublicKey, their_features: InitFeatures, msg: &msgs::OpenChannel) -> Result<(), MsgHandleErrInternal> {

0 commit comments

Comments
 (0)