Skip to content

Handle pre-startup and closed-channel monitor update completion actions #2391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions lightning/src/ln/chanmon_update_fail_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2346,6 +2346,7 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap();
check_added_monitors!(nodes[0], 0);

let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
nodes[0].node.claim_funds(payment_preimage_0);
Expand All @@ -2365,8 +2366,9 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
// disconnect the peers. Note that the fuzzer originally found this issue because
// deserializing a ChannelManager in this state causes an assertion failure.
Comment on lines 2366 to 2367
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't quite clear what issue the fuzzer had found before and if it would still find it after moving the initialization of chan_0_monitor_serialized.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So when the test was written we had no concept for in-flight monitor updates - we expected the user to (magically) call monitor_update_completed on the monitor update that they'd previously marked in-flight after startup. With the change here to finally actually "complete" monitor updates that completed while we were shut down, we have to do something to mark the monitor updates as actually-not-complete, which we do by loading with the old monitor.

if reload_a {
let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
reload_node!(nodes[0], &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized);
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
} else {
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
}
Expand Down Expand Up @@ -2406,9 +2408,14 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
assert_eq!(pending_cs.commitment_signed, cs);
} else { panic!(); }

// There should be no monitor updates as we are still pending awaiting a failed one.
check_added_monitors!(nodes[0], 0);
check_added_monitors!(nodes[1], 0);
if reload_a {
// The two pending monitor updates were replayed (but are still pending).
check_added_monitors(&nodes[0], 2);
} else {
// There should be no monitor updates as we are still pending awaiting a failed one.
check_added_monitors(&nodes[0], 0);
}
check_added_monitors(&nodes[1], 0);
}

// If we finish updating the monitor, we should free the holding cell right away (this did
Expand Down
62 changes: 53 additions & 9 deletions lightning/src/ln/channelmanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,13 @@ enum BackgroundEvent {
funding_txo: OutPoint,
update: ChannelMonitorUpdate
},
/// Some [`ChannelMonitorUpdate`] (s) completed before we were serialized but we still have
/// them marked pending, thus we need to run any [`MonitorUpdateCompletionAction`] (s) pending
/// on a channel.
MonitorUpdatesComplete {
counterparty_node_id: PublicKey,
channel_id: [u8; 32],
},
}

#[derive(Debug)]
Expand Down Expand Up @@ -4191,6 +4198,22 @@ where
}
let _ = handle_error!(self, res, counterparty_node_id);
},
BackgroundEvent::MonitorUpdatesComplete { counterparty_node_id, channel_id } => {
let per_peer_state = self.per_peer_state.read().unwrap();
if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
let mut peer_state_lock = peer_state_mutex.lock().unwrap();
let peer_state = &mut *peer_state_lock;
if let Some(chan) = peer_state.channel_by_id.get_mut(&channel_id) {
handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
} else {
let update_actions = peer_state.monitor_update_blocked_actions
.remove(&channel_id).unwrap_or(Vec::new());
mem::drop(peer_state_lock);
mem::drop(per_peer_state);
self.handle_monitor_update_completion_actions(update_actions);
}
}
},
}
}
NotifyOption::DoPersist
Expand Down Expand Up @@ -5001,24 +5024,29 @@ where
if peer_state_mutex_opt.is_none() { return }
peer_state_lock = peer_state_mutex_opt.unwrap().lock().unwrap();
let peer_state = &mut *peer_state_lock;
let mut channel = {
match peer_state.channel_by_id.entry(funding_txo.to_channel_id()){
hash_map::Entry::Occupied(chan) => chan,
hash_map::Entry::Vacant(_) => return,
}
};
let channel =
if let Some(chan) = peer_state.channel_by_id.get_mut(&funding_txo.to_channel_id()) {
chan
} else {
let update_actions = peer_state.monitor_update_blocked_actions
.remove(&funding_txo.to_channel_id()).unwrap_or(Vec::new());
mem::drop(peer_state_lock);
mem::drop(per_peer_state);
self.handle_monitor_update_completion_actions(update_actions);
return;
};
let remaining_in_flight =
if let Some(pending) = peer_state.in_flight_monitor_updates.get_mut(funding_txo) {
pending.retain(|upd| upd.update_id > highest_applied_update_id);
pending.len()
} else { 0 };
log_trace!(self.logger, "ChannelMonitor updated to {}. Current highest is {}. {} pending in-flight updates.",
highest_applied_update_id, channel.get().context.get_latest_monitor_update_id(),
highest_applied_update_id, channel.context.get_latest_monitor_update_id(),
remaining_in_flight);
if !channel.get().is_awaiting_monitor_update() || channel.get().context.get_latest_monitor_update_id() != highest_applied_update_id {
if !channel.is_awaiting_monitor_update() || channel.context.get_latest_monitor_update_id() != highest_applied_update_id {
return;
}
handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, channel.get_mut());
handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, channel);
}

/// Accepts a request to open a channel after a [`Event::OpenChannelRequest`].
Expand Down Expand Up @@ -8513,6 +8541,16 @@ where
update: update.clone(),
});
}
if $chan_in_flight_upds.is_empty() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check if any updates were actually removed before generating the background event?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only get here if there were some updates in the in_flight_monitor_updates map, which is only filled in with updates for channels with in-flight updates during serialization.

// We had some updates to apply, but it turns out they had completed before we
// were serialized, we just weren't notified of that. Thus, we may have to run
// the completion actions for any monitor updates, but otherwise are done.
pending_background_events.push(
BackgroundEvent::MonitorUpdatesComplete {
counterparty_node_id: $counterparty_node_id,
channel_id: $funding_txo.to_channel_id(),
});
}
if $peer_state.in_flight_monitor_updates.insert($funding_txo, $chan_in_flight_upds).is_some() {
log_error!(args.logger, "Duplicate in-flight monitor update set for the same channel!");
return Err(DecodeError::InvalidValue);
Expand Down Expand Up @@ -8905,6 +8943,12 @@ where
blocked_peer_state.lock().unwrap().actions_blocking_raa_monitor_updates
.entry(blocked_channel_outpoint.to_channel_id())
.or_insert_with(Vec::new).push(blocking_action.clone());
} else {
// If the channel we were blocking has closed, we don't need to
// worry about it - the blocked monitor update should never have
// been released from the `Channel` object so it can't have
// completed, and if the channel closed there's no reason to bother
// anymore.
}
}
}
Expand Down