Skip to content

Commit 9f069c0

Browse files
committed
Block the mon update removing a preimage until upstream mon writes
When we forward a payment and receive an `update_fulfill_htlc` message from the downstream channel, we immediately claim the HTLC on the upstream channel, before even doing a `commitment_signed` dance on the downstream channel. This implies that our `ChannelMonitorUpdate`s "go out" in the right order - first we ensure we'll get our money by writing the preimage down, then we write the update that resolves giving money on the downstream node. This is safe as long as `ChannelMonitorUpdate`s complete in the order in which they are generated, but of course looking forward we want to support asynchronous updates, which may complete in any order. Thus, here, we enforce the correct ordering by blocking the downstream `ChannelMonitorUpdate` until the upstream one completes. Like the `PaymentSent` event handling we do so only for the `revoke_and_ack` `ChannelMonitorUpdate`, ensuring the preimage-containing upstream update has a full RTT to complete before we actually manage to slow anything down.
1 parent e37a400 commit 9f069c0

File tree

3 files changed

+194
-28
lines changed

3 files changed

+194
-28
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 140 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3038,8 +3038,8 @@ fn test_blocked_chan_preimage_release() {
30383038
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
30393039
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
30403040

3041-
create_announced_chan_between_nodes(&nodes, 0, 1).2;
3042-
create_announced_chan_between_nodes(&nodes, 1, 2).2;
3041+
create_announced_chan_between_nodes(&nodes, 0, 1);
3042+
let chan_id_2 = create_announced_chan_between_nodes(&nodes, 1, 2).2;
30433043

30443044
send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 5_000_000);
30453045

@@ -3068,20 +3068,29 @@ fn test_blocked_chan_preimage_release() {
30683068
let as_htlc_fulfill_updates = get_htlc_update_msgs!(nodes[0], nodes[1].node.get_our_node_id());
30693069
nodes[1].node.handle_update_fulfill_htlc(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.update_fulfill_htlcs[0]);
30703070
check_added_monitors(&nodes[1], 1); // We generate only a preimage monitor update
3071+
assert!(get_monitor!(nodes[1], chan_id_2).get_stored_preimages().contains_key(&payment_hash_2));
30713072
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30723073

3073-
// Finish the CS dance between nodes[0] and nodes[1].
3074-
do_commitment_signed_dance(&nodes[1], &nodes[0], &as_htlc_fulfill_updates.commitment_signed, false, false);
3074+
// Finish the CS dance between nodes[0] and nodes[1]. Note that until the event handling, the
3075+
// update_fulfill_htlc + CS is held, even though the preimage is already on disk for the
3076+
// channel.
3077+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.commitment_signed);
3078+
check_added_monitors(&nodes[1], 1);
3079+
let (a, raa) = do_main_commitment_signed_dance(&nodes[1], &nodes[0], false);
3080+
assert!(a.is_none());
3081+
3082+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &raa);
30753083
check_added_monitors(&nodes[1], 0);
3084+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
30763085

30773086
let events = nodes[1].node.get_and_clear_pending_events();
30783087
assert_eq!(events.len(), 3);
30793088
if let Event::PaymentSent { .. } = events[0] {} else { panic!(); }
30803089
if let Event::PaymentPathSuccessful { .. } = events[2] {} else { panic!(); }
30813090
if let Event::PaymentForwarded { .. } = events[1] {} else { panic!(); }
30823091

3083-
// The event processing should release the last RAA update.
3084-
check_added_monitors(&nodes[1], 1);
3092+
// The event processing should release the last RAA updates on both channels.
3093+
check_added_monitors(&nodes[1], 2);
30853094

30863095
// When we fetch the next update the message getter will generate the next update for nodes[2],
30873096
// generating a further monitor update.
@@ -3092,3 +3101,128 @@ fn test_blocked_chan_preimage_release() {
30923101
do_commitment_signed_dance(&nodes[2], &nodes[1], &bs_htlc_fulfill_updates.commitment_signed, false, false);
30933102
expect_payment_sent(&nodes[2], payment_preimage_2, None, true, true);
30943103
}
3104+
3105+
fn do_test_inverted_mon_completion_order(complete_bc_commitment_dance: bool) {
3106+
// When we forward a payment and receive an `update_fulfill_htlc` message from the downstream
3107+
// channel, we immediately claim the HTLC on the upstream channel, before even doing a
3108+
// `commitment_signed` dance on the downstream channel. This implies that our
3109+
// `ChannelMonitorUpdate`s are generated in the right order - first we ensure we'll get our
3110+
// money, then we write the update that resolves giving money on the downstream node. This is
3111+
// safe as long as `ChannelMonitorUpdate`s complete in the order in which they are generated,
3112+
// but of course this may not be the case. For asynchronous update writes, we have to ensure
3113+
// monitor updates can block each other, preventing the inversion all together.
3114+
let chanmon_cfgs = create_chanmon_cfgs(3);
3115+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
3116+
3117+
let persister;
3118+
let new_chain_monitor;
3119+
let nodes_1_deserialized;
3120+
3121+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
3122+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
3123+
3124+
let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2;
3125+
let chan_id_bc = create_announced_chan_between_nodes(&nodes, 1, 2).2;
3126+
3127+
// Route a payment from A, through B, to C, then claim it on C. Once we pass B the
3128+
// `update_fulfill_htlc` we have a monitor update for both of B's channels. We complete the one
3129+
// on the B<->C channel but leave the A<->B monitor update pending, then reload B.
3130+
let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
3131+
3132+
let mon_ab = get_monitor!(nodes[1], chan_id_ab).encode();
3133+
3134+
nodes[2].node.claim_funds(payment_preimage);
3135+
check_added_monitors(&nodes[2], 1);
3136+
expect_payment_claimed!(nodes[2], payment_hash, 100_000);
3137+
3138+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3139+
let cs_updates = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
3140+
nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
3141+
3142+
// B generates a new monitor update for the A <-> B channel, but doesn't send the new messages
3143+
// for it since the monitor update is marked in-progress.
3144+
check_added_monitors(&nodes[1], 1);
3145+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3146+
3147+
// Now step the Commitment Signed Dance between B and C forward a bit (or fully), ensuring we
3148+
// won't get the preimage when the nodes reconnect and we have to get it from the
3149+
// ChannelMonitor.
3150+
nodes[1].node.handle_commitment_signed(&nodes[2].node.get_our_node_id(), &cs_updates.commitment_signed);
3151+
check_added_monitors(&nodes[1], 1);
3152+
if complete_bc_commitment_dance {
3153+
let (bs_revoke_and_ack, bs_commitment_signed) = get_revoke_commit_msgs!(nodes[1], nodes[2].node.get_our_node_id());
3154+
nodes[2].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
3155+
check_added_monitors(&nodes[2], 1);
3156+
nodes[2].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_commitment_signed);
3157+
check_added_monitors(&nodes[2], 1);
3158+
let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
3159+
3160+
// At this point node B still hasn't persisted the `ChannelMonitorUpdate` with the
3161+
// preimage in the A <-> B channel, which will prevent it from persisting the
3162+
// `ChannelMonitorUpdate` for the B<->C channel here to avoid "losing" the preimage.
3163+
nodes[1].node.handle_revoke_and_ack(&nodes[2].node.get_our_node_id(), &cs_raa);
3164+
check_added_monitors(&nodes[1], 0);
3165+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3166+
}
3167+
3168+
// Now reload node B
3169+
let manager_b = nodes[1].node.encode();
3170+
3171+
let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode();
3172+
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, new_chain_monitor, nodes_1_deserialized);
3173+
3174+
nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3175+
nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id());
3176+
3177+
// If we used the latest ChannelManager to reload from, we should have both channels still
3178+
// live. The B <-> C channel's final RAA ChannelMonitorUpdate must still be blocked as
3179+
// before - the ChannelMonitorUpdate for the A <-> B channel hasn't completed.
3180+
// When we call `timer_tick_occurred` we will get that monitor update back, which we'll
3181+
// complete after reconnecting to our peers.
3182+
persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
3183+
nodes[1].node.timer_tick_occurred();
3184+
check_added_monitors(&nodes[1], 1);
3185+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3186+
3187+
// Now reconnect B to both A and C. If the B <-> C commitment signed dance wasn't run to
3188+
// the end go ahead and do that, though the
3189+
// `pending_responding_commitment_signed_dup_monitor` in `reconnect_args` indicates that we
3190+
// expect to *not* receive the final RAA ChannelMonitorUpdate.
3191+
if complete_bc_commitment_dance {
3192+
reconnect_nodes(ReconnectArgs::new(&nodes[1], &nodes[2]));
3193+
} else {
3194+
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
3195+
reconnect_args.pending_responding_commitment_signed.1 = true;
3196+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3197+
reconnect_args.pending_raa = (false, true);
3198+
reconnect_nodes(reconnect_args);
3199+
}
3200+
3201+
reconnect_nodes(ReconnectArgs::new(&nodes[0], &nodes[1]));
3202+
3203+
// (Finally) complete the A <-> B ChannelMonitorUpdate, ensuring the preimage is durably on
3204+
// disk in the proper ChannelMonitor, unblocking the B <-> C ChannelMonitor updating
3205+
// process.
3206+
let (outpoint, _, ab_update_id) = nodes[1].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone();
3207+
nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).unwrap();
3208+
3209+
// When we fetch B's HTLC update messages here (now that the ChannelMonitorUpdate has
3210+
// completed), it will also release the final RAA ChannelMonitorUpdate on the B <-> C
3211+
// channel.
3212+
let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
3213+
check_added_monitors(&nodes[1], 1);
3214+
3215+
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
3216+
do_commitment_signed_dance(&nodes[0], &nodes[1], &bs_updates.commitment_signed, false, false);
3217+
3218+
expect_payment_forwarded!(nodes[1], &nodes[0], &nodes[2], Some(1_000), false, false);
3219+
3220+
// Finally, check that the payment was, ultimately, seen as sent by node A.
3221+
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
3222+
}
3223+
3224+
#[test]
3225+
fn test_inverted_mon_completion_order() {
3226+
do_test_inverted_mon_completion_order(true);
3227+
do_test_inverted_mon_completion_order(false);
3228+
}

lightning/src/ln/channelmanager.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,6 @@ pub(crate) enum RAAMonitorUpdateBlockingAction {
656656
}
657657

658658
impl RAAMonitorUpdateBlockingAction {
659-
#[allow(unused)]
660659
fn from_prev_hop_data(prev_hop: &HTLCPreviousHopData) -> Self {
661660
Self::ForwardedPaymentInboundClaim {
662661
channel_id: prev_hop.outpoint.to_channel_id(),
@@ -5175,11 +5174,17 @@ where
51755174
self.pending_outbound_payments.finalize_claims(sources, &self.pending_events);
51765175
}
51775176

5178-
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage, forwarded_htlc_value_msat: Option<u64>, from_onchain: bool, next_channel_outpoint: OutPoint) {
5177+
fn claim_funds_internal(&self, source: HTLCSource, payment_preimage: PaymentPreimage,
5178+
forwarded_htlc_value_msat: Option<u64>, from_onchain: bool,
5179+
next_channel_counterparty_node_id: Option<PublicKey>, next_channel_outpoint: OutPoint
5180+
) {
51795181
match source {
51805182
HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } => {
51815183
debug_assert!(self.background_events_processed_since_startup.load(Ordering::Acquire),
51825184
"We don't support claim_htlc claims during startup - monitors may not be available yet");
5185+
if let Some(pubkey) = next_channel_counterparty_node_id {
5186+
debug_assert_eq!(pubkey, path.hops[0].pubkey);
5187+
}
51835188
let ev_completion_action = EventCompletionAction::ReleaseRAAChannelMonitorUpdate {
51845189
channel_funding_outpoint: next_channel_outpoint,
51855190
counterparty_node_id: path.hops[0].pubkey,
@@ -5190,6 +5195,7 @@ where
51905195
},
51915196
HTLCSource::PreviousHopData(hop_data) => {
51925197
let prev_outpoint = hop_data.outpoint;
5198+
let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data);
51935199
let res = self.claim_funds_from_hop(hop_data, payment_preimage,
51945200
|htlc_claim_value_msat| {
51955201
if let Some(forwarded_htlc_value) = forwarded_htlc_value_msat {
@@ -5205,7 +5211,17 @@ where
52055211
next_channel_id: Some(next_channel_outpoint.to_channel_id()),
52065212
outbound_amount_forwarded_msat: forwarded_htlc_value_msat,
52075213
},
5208-
downstream_counterparty_and_funding_outpoint: None,
5214+
downstream_counterparty_and_funding_outpoint:
5215+
if let Some(node_id) = next_channel_counterparty_node_id {
5216+
Some((node_id, next_channel_outpoint, completed_blocker))
5217+
} else {
5218+
// We can only get `None` here if we are processing a
5219+
// `ChannelMonitor`-originated event, in which case we
5220+
// don't care about ensuring we wake the downstream
5221+
// channel's monitor updating - the channel is already
5222+
// closed.
5223+
None
5224+
},
52095225
})
52105226
} else { None }
52115227
});
@@ -6044,6 +6060,17 @@ where
60446060
hash_map::Entry::Occupied(mut chan_phase_entry) => {
60456061
if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() {
60466062
let res = try_chan_phase_entry!(self, chan.update_fulfill_htlc(&msg), chan_phase_entry);
6063+
if let HTLCSource::PreviousHopData(prev_hop) = &res.0 {
6064+
peer_state.actions_blocking_raa_monitor_updates.entry(msg.channel_id)
6065+
.or_insert_with(Vec::new)
6066+
.push(RAAMonitorUpdateBlockingAction::from_prev_hop_data(&prev_hop));
6067+
}
6068+
// Note that we do not need to push an `actions_blocking_raa_monitor_updates`
6069+
// entry here, even though we *do* need to block the next RAA monitor update.
6070+
// We do this instead in the `claim_funds_internal` by attaching a
6071+
// `ReleaseRAAChannelMonitorUpdate` action to the event generated when the
6072+
// outbound HTLC is claimed. This is guaranteed to all complete before we
6073+
// process the RAA as messages are processed from single peers serially.
60476074
funding_txo = chan.context.get_funding_txo().expect("We won't accept a fulfill until funded");
60486075
res
60496076
} else {
@@ -6054,7 +6081,7 @@ where
60546081
hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
60556082
}
60566083
};
6057-
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, funding_txo);
6084+
self.claim_funds_internal(htlc_source, msg.payment_preimage.clone(), Some(forwarded_htlc_value), false, Some(*counterparty_node_id), funding_txo);
60586085
Ok(())
60596086
}
60606087

@@ -6256,6 +6283,23 @@ where
62566283
})
62576284
}
62586285

6286+
#[cfg(any(test, feature = "_test_utils"))]
6287+
pub(crate) fn test_raa_monitor_updates_held(&self,
6288+
counterparty_node_id: PublicKey, channel_id: ChannelId
6289+
) -> bool {
6290+
let per_peer_state = self.per_peer_state.read().unwrap();
6291+
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
6292+
let mut peer_state_lck = peer_state_mtx.lock().unwrap();
6293+
let peer_state = &mut *peer_state_lck;
6294+
6295+
if let Some(chan) = peer_state.channel_by_id.get(&channel_id) {
6296+
return self.raa_monitor_updates_held(&peer_state.actions_blocking_raa_monitor_updates,
6297+
chan.context().get_funding_txo().unwrap(), counterparty_node_id);
6298+
}
6299+
}
6300+
false
6301+
}
6302+
62596303
fn internal_revoke_and_ack(&self, counterparty_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
62606304
let (htlcs_to_fail, res) = {
62616305
let per_peer_state = self.per_peer_state.read().unwrap();
@@ -6477,8 +6521,8 @@ where
64776521
match monitor_event {
64786522
MonitorEvent::HTLCEvent(htlc_update) => {
64796523
if let Some(preimage) = htlc_update.payment_preimage {
6480-
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", &preimage);
6481-
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, funding_outpoint);
6524+
log_trace!(self.logger, "Claiming HTLC with preimage {} from our monitor", preimage);
6525+
self.claim_funds_internal(htlc_update.source, preimage, htlc_update.htlc_value_satoshis.map(|v| v * 1000), true, counterparty_node_id, funding_outpoint);
64826526
} else {
64836527
log_trace!(self.logger, "Failing HTLC with hash {} from our monitor", &htlc_update.payment_hash);
64846528
let receiver = HTLCDestination::NextHopChannel { node_id: counterparty_node_id, channel_id: funding_outpoint.to_channel_id() };
@@ -9298,6 +9342,7 @@ where
92989342
// downstream chan is closed (because we don't have a
92999343
// channel_id -> peer map entry).
93009344
counterparty_opt.is_none(),
9345+
counterparty_opt.cloned().or(monitor.get_counterparty_node_id()),
93019346
monitor.get_funding_txo().0))
93029347
} else { None }
93039348
} else {
@@ -9576,12 +9621,12 @@ where
95769621
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
95779622
}
95789623

9579-
for (source, preimage, downstream_value, downstream_closed, downstream_funding) in pending_claims_to_replay {
9624+
for (source, preimage, downstream_value, downstream_closed, downstream_node_id, downstream_funding) in pending_claims_to_replay {
95809625
// We use `downstream_closed` in place of `from_onchain` here just as a guess - we
95819626
// don't remember in the `ChannelMonitor` where we got a preimage from, but if the
95829627
// channel is closed we just assume that it probably came from an on-chain claim.
95839628
channel_manager.claim_funds_internal(source, preimage, Some(downstream_value),
9584-
downstream_closed, downstream_funding);
9629+
downstream_closed, downstream_node_id, downstream_funding);
95859630
}
95869631

95879632
//TODO: Broadcast channel update for closed channels, but only after we've made a

lightning/src/ln/functional_test_utils.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,20 +1772,7 @@ pub fn do_commitment_signed_dance(node_a: &Node<'_, '_, '_>, node_b: &Node<'_, '
17721772
check_added_monitors!(node_a, 1);
17731773

17741774
// If this commitment signed dance was due to a claim, don't check for an RAA monitor update.
1775-
let got_claim = node_a.node.pending_events.lock().unwrap().iter().any(|(ev, action)| {
1776-
let matching_action = if let Some(channelmanager::EventCompletionAction::ReleaseRAAChannelMonitorUpdate
1777-
{ channel_funding_outpoint, counterparty_node_id }) = action
1778-
{
1779-
if channel_funding_outpoint.to_channel_id() == commitment_signed.channel_id {
1780-
assert_eq!(*counterparty_node_id, node_b.node.get_our_node_id());
1781-
true
1782-
} else { false }
1783-
} else { false };
1784-
if matching_action {
1785-
if let Event::PaymentSent { .. } = ev {} else { panic!(); }
1786-
}
1787-
matching_action
1788-
});
1775+
let got_claim = node_a.node.test_raa_monitor_updates_held(node_b.node.get_our_node_id(), commitment_signed.channel_id);
17891776
if fail_backwards { assert!(!got_claim); }
17901777
commitment_signed_dance!(node_a, node_b, (), fail_backwards, true, false, got_claim);
17911778

0 commit comments

Comments
 (0)