Skip to content

Commit 9a36e4a

Browse files
committed
Delay RAA-after-next processing until PaymentSent is are handled
In 0ad1f4c we fixed a nasty bug where a failure to persist a `ChannelManager` faster than a `ChannelMonitor` could result in the loss of a `PaymentSent` event, eventually resulting in a `PaymentFailed` instead! As noted in that commit, there's still some risk, though its been substantially reduced - if we receive an `update_fulfill_htlc` message for an outbound payment, and persist the initial removal `ChannelMonitorUpdate`, then respond with our own `commitment_signed` + `revoke_and_ack`, followed by receiving our peer's final `revoke_and_ack`, and then persist the `ChannelMonitorUpdate` generated from that, all prior to completing a `ChannelManager` persistence, we'll still forget the HTLC and eventually trigger a `PaymentFailed` rather than the correct `PaymentSent`. Here we fully fix the issue by delaying the final `ChannelMonitorUpdate` persistence until the `PaymentSent` event has been processed and document the fact that a spurious `PaymentFailed` event can still be generated for a sent payment. The original fix in 0ad1f4c is still incredibly useful here, allowing us to avoid blocking the first `ChannelMonitorUpdate` until the event processing completes, as this would cause us to add event-processing delay in our general commitment update latency. Instead, we ultimately race the user handling the `PaymentSent` event with how long it takes our `revoke_and_ack` + `commitment_signed` to make it to our counterparty and receive the response `revoke_and_ack`. This should give the user plenty of time to handle the event before we need to make progress. Sadly, because we change our `ChannelMonitorUpdate` semantics, this change requires a number of test changes, avoiding checking for a post-RAA `ChannelMonitorUpdate` until after we process a `PaymentSent` event. Note that this does not apply to payments we learned the preimage for on-chain - ensuring `PaymentSent` events from such resolutions will be addressed in a future PR. Thus, tests which resolve payments on-chain switch to a direct call to the `expect_payment_sent` function with the claim-expected flag unset.
1 parent e94647c commit 9a36e4a

14 files changed

+326
-104
lines changed

lightning-invoice/src/utils.rs

+1-16
Original file line numberDiff line numberDiff line change
@@ -1336,22 +1336,7 @@ mod test {
13361336
let payment_preimage_opt = if user_generated_pmt_hash { None } else { Some(payment_preimage) };
13371337
expect_payment_claimable!(&nodes[fwd_idx], payment_hash, payment_secret, payment_amt, payment_preimage_opt, invoice.recover_payee_pub_key());
13381338
do_claim_payment_along_route(&nodes[0], &[&vec!(&nodes[fwd_idx])[..]], false, payment_preimage);
1339-
let events = nodes[0].node.get_and_clear_pending_events();
1340-
assert_eq!(events.len(), 2);
1341-
match events[0] {
1342-
Event::PaymentSent { payment_preimage: ref ev_preimage, payment_hash: ref ev_hash, ref fee_paid_msat, .. } => {
1343-
assert_eq!(payment_preimage, *ev_preimage);
1344-
assert_eq!(payment_hash, *ev_hash);
1345-
assert_eq!(fee_paid_msat, &Some(0));
1346-
},
1347-
_ => panic!("Unexpected event")
1348-
}
1349-
match events[1] {
1350-
Event::PaymentPathSuccessful { payment_hash: hash, .. } => {
1351-
assert_eq!(hash, Some(payment_hash));
1352-
},
1353-
_ => panic!("Unexpected event")
1354-
}
1339+
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
13551340
}
13561341

13571342
#[test]

lightning/src/chain/chainmonitor.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ mod tests {
828828
use bitcoin::{BlockHeader, TxMerkleNode};
829829
use bitcoin::hashes::Hash;
830830
use crate::{check_added_monitors, check_closed_broadcast, check_closed_event};
831-
use crate::{expect_payment_sent, expect_payment_claimed, expect_payment_sent_without_paths, expect_payment_path_successful, get_event_msg};
831+
use crate::{expect_payment_claimed, expect_payment_path_successful, get_event_msg};
832832
use crate::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
833833
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Watch};
834834
use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
@@ -911,7 +911,7 @@ mod tests {
911911

912912
let updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
913913
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
914-
expect_payment_sent_without_paths!(nodes[0], payment_preimage_1);
914+
expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false);
915915
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &updates.commitment_signed);
916916
check_added_monitors!(nodes[0], 1);
917917
let (as_first_raa, as_first_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
@@ -924,7 +924,7 @@ mod tests {
924924
let bs_first_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
925925

926926
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_second_updates.update_fulfill_htlcs[0]);
927-
expect_payment_sent_without_paths!(nodes[0], payment_preimage_2);
927+
expect_payment_sent(&nodes[0], payment_preimage_2, None, false, false);
928928
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_updates.commitment_signed);
929929
check_added_monitors!(nodes[0], 1);
930930
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_first_raa);
@@ -1013,7 +1013,7 @@ mod tests {
10131013
}
10141014
}
10151015

1016-
expect_payment_sent!(nodes[0], payment_preimage);
1016+
expect_payment_sent(&nodes[0], payment_preimage, None, true, false);
10171017
}
10181018

10191019
#[test]

lightning/src/events/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,11 @@ pub enum Event {
472472
/// payment is no longer retryable, due either to the [`Retry`] provided or
473473
/// [`ChannelManager::abandon_payment`] having been called for the corresponding payment.
474474
///
475+
/// In exceedingly rare cases, it is possible that an [`Event::PaymentFailed`] is generated for
476+
/// a payment after an [`Event::PaymentSent`] event for this same payment has already been
477+
/// received and processed. In this case, the [`Event::PaymentFailed`] event MUST be ignored,
478+
/// and the payment MUST be treated as having succeeded.
479+
///
475480
/// [`Retry`]: crate::ln::channelmanager::Retry
476481
/// [`ChannelManager::abandon_payment`]: crate::ln::channelmanager::ChannelManager::abandon_payment
477482
PaymentFailed {

lightning/src/ln/chanmon_update_fail_tests.rs

+72-7
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,7 @@ fn claim_while_disconnected_monitor_update_fail() {
13901390
MessageSendEvent::UpdateHTLCs { ref node_id, ref updates } => {
13911391
assert_eq!(*node_id, nodes[0].node.get_our_node_id());
13921392
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
1393+
expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false);
13931394
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &updates.commitment_signed);
13941395
check_added_monitors!(nodes[0], 1);
13951396

@@ -1427,7 +1428,7 @@ fn claim_while_disconnected_monitor_update_fail() {
14271428

14281429
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_raa);
14291430
check_added_monitors!(nodes[0], 1);
1430-
expect_payment_sent!(nodes[0], payment_preimage_1);
1431+
expect_payment_path_successful!(nodes[0]);
14311432

14321433
claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_2);
14331434
}
@@ -2173,7 +2174,7 @@ fn test_fail_htlc_on_broadcast_after_claim() {
21732174
expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1], vec![HTLCDestination::NextHopChannel { node_id: Some(nodes[2].node.get_our_node_id()), channel_id: chan_id_2 }]);
21742175

21752176
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
2176-
expect_payment_sent_without_paths!(nodes[0], payment_preimage);
2177+
expect_payment_sent(&nodes[0], payment_preimage, None, false, false);
21772178
commitment_signed_dance!(nodes[0], nodes[1], bs_updates.commitment_signed, true, true);
21782179
expect_payment_path_successful!(nodes[0]);
21792180
}
@@ -2411,7 +2412,7 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
24112412
assert!(updates.update_fee.is_none());
24122413
assert_eq!(updates.update_fulfill_htlcs.len(), 1);
24132414
nodes[1].node.handle_update_fulfill_htlc(&nodes[0].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
2414-
expect_payment_sent_without_paths!(nodes[1], payment_preimage_0);
2415+
expect_payment_sent(&nodes[1], payment_preimage_0, None, false, false);
24152416
assert_eq!(updates.update_add_htlcs.len(), 1);
24162417
nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &updates.update_add_htlcs[0]);
24172418
updates.commitment_signed
@@ -2428,7 +2429,7 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
24282429
expect_payment_claimable!(nodes[1], payment_hash_1, payment_secret_1, 100000);
24292430
check_added_monitors!(nodes[1], 1);
24302431

2431-
commitment_signed_dance!(nodes[1], nodes[0], (), false, true, false);
2432+
commitment_signed_dance!(nodes[1], nodes[0], (), false, true, false, false);
24322433

24332434
let events = nodes[1].node.get_and_clear_pending_events();
24342435
assert_eq!(events.len(), 2);
@@ -2529,7 +2530,7 @@ fn do_test_reconnect_dup_htlc_claims(htlc_status: HTLCStatusAtDupClaim, second_f
25292530
bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
25302531
assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
25312532
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
2532-
expect_payment_sent_without_paths!(nodes[0], payment_preimage);
2533+
expect_payment_sent(&nodes[0], payment_preimage, None, false, false);
25332534
if htlc_status == HTLCStatusAtDupClaim::Cleared {
25342535
commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
25352536
expect_payment_path_successful!(nodes[0]);
@@ -2556,7 +2557,7 @@ fn do_test_reconnect_dup_htlc_claims(htlc_status: HTLCStatusAtDupClaim, second_f
25562557
bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
25572558
assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
25582559
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
2559-
expect_payment_sent_without_paths!(nodes[0], payment_preimage);
2560+
expect_payment_sent(&nodes[0], payment_preimage, None, false, false);
25602561
}
25612562
if htlc_status != HTLCStatusAtDupClaim::Cleared {
25622563
commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
@@ -2753,7 +2754,7 @@ fn double_temp_error() {
27532754
assert_eq!(node_id, nodes[0].node.get_our_node_id());
27542755
nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &update_fulfill_1);
27552756
check_added_monitors!(nodes[0], 0);
2756-
expect_payment_sent_without_paths!(nodes[0], payment_preimage_1);
2757+
expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false);
27572758
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &commitment_signed_b1);
27582759
check_added_monitors!(nodes[0], 1);
27592760
nodes[0].node.process_pending_htlc_forwards();
@@ -2978,3 +2979,67 @@ fn test_inbound_reload_without_init_mon() {
29782979
do_test_inbound_reload_without_init_mon(false, true);
29792980
do_test_inbound_reload_without_init_mon(false, false);
29802981
}
2982+
2983+
#[test]
2984+
fn test_blocked_chan_preimage_release() {
2985+
// Test that even if a channel's `ChannelMonitorUpdate` flow is blocked waiting on an event to
2986+
// be handled HTLC preimage `ChannelMonitorUpdate`s will still go out.
2987+
let chanmon_cfgs = create_chanmon_cfgs(3);
2988+
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
2989+
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
2990+
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
2991+
2992+
create_announced_chan_between_nodes(&nodes, 0, 1).2;
2993+
create_announced_chan_between_nodes(&nodes, 1, 2).2;
2994+
2995+
send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 5_000_000);
2996+
2997+
// Tee up two payments in opposite directions across nodes[1], one it sent to generate a
2998+
// PaymentSent event and one it forwards.
2999+
let (payment_preimage_1, payment_hash_1, _) = route_payment(&nodes[1], &[&nodes[2]], 1_000_000);
3000+
let (payment_preimage_2, payment_hash_2, _) = route_payment(&nodes[2], &[&nodes[1], &nodes[0]], 1_000_000);
3001+
3002+
// Claim the first payment to get a `PaymentSent` event (but don't handle it yet).
3003+
nodes[2].node.claim_funds(payment_preimage_1);
3004+
check_added_monitors(&nodes[2], 1);
3005+
expect_payment_claimed!(nodes[2], payment_hash_1, 1_000_000);
3006+
3007+
let cs_htlc_fulfill_updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
3008+
nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_htlc_fulfill_updates.update_fulfill_htlcs[0]);
3009+
commitment_signed_dance!(nodes[1], nodes[2], cs_htlc_fulfill_updates.commitment_signed, false);
3010+
check_added_monitors(&nodes[1], 0);
3011+
3012+
// Now claim the second payment on nodes[0], which will ultimately result in nodes[1] trying to
3013+
// claim an HTLC on its channel with nodes[2], but that channel is blocked on the above
3014+
// `PaymentSent` event.
3015+
nodes[0].node.claim_funds(payment_preimage_2);
3016+
check_added_monitors(&nodes[0], 1);
3017+
expect_payment_claimed!(nodes[0], payment_hash_2, 1_000_000);
3018+
3019+
let as_htlc_fulfill_updates = get_htlc_update_msgs!(nodes[0], nodes[1].node.get_our_node_id());
3020+
nodes[1].node.handle_update_fulfill_htlc(&nodes[0].node.get_our_node_id(), &as_htlc_fulfill_updates.update_fulfill_htlcs[0]);
3021+
check_added_monitors(&nodes[1], 1); // We generate only a preimage monitor update
3022+
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
3023+
3024+
// Finish the CS dance between nodes[0] and nodes[1].
3025+
commitment_signed_dance!(nodes[1], nodes[0], as_htlc_fulfill_updates.commitment_signed, false);
3026+
check_added_monitors(&nodes[1], 0);
3027+
3028+
let events = nodes[1].node.get_and_clear_pending_events();
3029+
assert_eq!(events.len(), 3);
3030+
if let Event::PaymentSent { .. } = events[0] {} else { panic!(); }
3031+
if let Event::PaymentPathSuccessful { .. } = events[2] {} else { panic!(); }
3032+
if let Event::PaymentForwarded { .. } = events[1] {} else { panic!(); }
3033+
3034+
// The event processing should release the last RAA update.
3035+
check_added_monitors(&nodes[1], 1);
3036+
3037+
// When we fetch the next update the message getter will generate the next update for nodes[2],
3038+
// generating a further monitor update.
3039+
let bs_htlc_fulfill_updates = get_htlc_update_msgs!(nodes[1], nodes[2].node.get_our_node_id());
3040+
check_added_monitors(&nodes[1], 1);
3041+
3042+
nodes[2].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_htlc_fulfill_updates.update_fulfill_htlcs[0]);
3043+
commitment_signed_dance!(nodes[2], nodes[1], bs_htlc_fulfill_updates.commitment_signed, false);
3044+
expect_payment_sent(&nodes[2], payment_preimage_2, None, true, true);
3045+
}

lightning/src/ln/channel.rs

+37-8
Original file line numberDiff line numberDiff line change
@@ -3467,7 +3467,8 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
34673467
/// waiting on this revoke_and_ack. The generation of this new commitment_signed may also fail,
34683468
/// generating an appropriate error *after* the channel state has been updated based on the
34693469
/// revoke_and_ack message.
3470-
pub fn revoke_and_ack<L: Deref>(&mut self, msg: &msgs::RevokeAndACK, logger: &L) -> Result<(Vec<(HTLCSource, PaymentHash)>, Option<&ChannelMonitorUpdate>), ChannelError>
3470+
pub fn revoke_and_ack<L: Deref>(&mut self, msg: &msgs::RevokeAndACK, logger: &L, hold_mon_update: bool)
3471+
-> Result<(Vec<(HTLCSource, PaymentHash)>, Option<&ChannelMonitorUpdate>), ChannelError>
34713472
where L::Target: Logger,
34723473
{
34733474
if (self.channel_state & (ChannelState::ChannelReady as u32)) != (ChannelState::ChannelReady as u32) {
@@ -3646,6 +3647,10 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
36463647
}
36473648
}
36483649

3650+
let release_monitor = self.pending_monitor_updates.iter().all(|upd| !upd.blocked) && !hold_mon_update;
3651+
let release_state_str =
3652+
if hold_mon_update { "Holding" } else if release_monitor { "Releasing" } else { "Blocked" };
3653+
36493654
if (self.channel_state & ChannelState::MonitorUpdateInProgress as u32) == ChannelState::MonitorUpdateInProgress as u32 {
36503655
// We can't actually generate a new commitment transaction (incl by freeing holding
36513656
// cells) while we can't update the monitor, so we just return what we have.
@@ -3664,7 +3669,11 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
36643669
self.monitor_pending_failures.append(&mut revoked_htlcs);
36653670
self.monitor_pending_finalized_fulfills.append(&mut finalized_claimed_htlcs);
36663671
log_debug!(logger, "Received a valid revoke_and_ack for channel {} but awaiting a monitor update resolution to reply.", log_bytes!(self.channel_id()));
3667-
return Ok((Vec::new(), self.push_ret_blockable_mon_update(monitor_update)));
3672+
self.pending_monitor_updates.push(PendingChannelMonitorUpdate {
3673+
update: monitor_update, blocked: !release_monitor,
3674+
});
3675+
return Ok((Vec::new(),
3676+
if release_monitor { self.pending_monitor_updates.last().map(|upd| &upd.update) } else { None }));
36683677
}
36693678

36703679
match self.free_holding_cell_htlcs(logger) {
@@ -3675,8 +3684,15 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
36753684
self.latest_monitor_update_id = monitor_update.update_id;
36763685
monitor_update.updates.append(&mut additional_update.updates);
36773686

3687+
log_debug!(logger, "Received a valid revoke_and_ack for channel {} with holding cell HTLCs freed. {} monitor update.",
3688+
log_bytes!(self.channel_id()), release_state_str);
3689+
36783690
self.monitor_updating_paused(false, true, false, to_forward_infos, revoked_htlcs, finalized_claimed_htlcs);
3679-
Ok((htlcs_to_fail, self.push_ret_blockable_mon_update(monitor_update)))
3691+
self.pending_monitor_updates.push(PendingChannelMonitorUpdate {
3692+
update: monitor_update, blocked: !release_monitor,
3693+
});
3694+
Ok((htlcs_to_fail,
3695+
if release_monitor { self.pending_monitor_updates.last().map(|upd| &upd.update) } else { None }))
36803696
},
36813697
(None, htlcs_to_fail) => {
36823698
if require_commitment {
@@ -3687,14 +3703,27 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
36873703
self.latest_monitor_update_id = monitor_update.update_id;
36883704
monitor_update.updates.append(&mut additional_update.updates);
36893705

3690-
log_debug!(logger, "Received a valid revoke_and_ack for channel {}. Responding with a commitment update with {} HTLCs failed.",
3691-
log_bytes!(self.channel_id()), update_fail_htlcs.len() + update_fail_malformed_htlcs.len());
3706+
log_debug!(logger, "Received a valid revoke_and_ack for channel {}. Responding with a commitment update with {} HTLCs failed. {} monitor update.",
3707+
log_bytes!(self.channel_id()),
3708+
update_fail_htlcs.len() + update_fail_malformed_htlcs.len(),
3709+
release_state_str);
3710+
36923711
self.monitor_updating_paused(false, true, false, to_forward_infos, revoked_htlcs, finalized_claimed_htlcs);
3693-
Ok((htlcs_to_fail, self.push_ret_blockable_mon_update(monitor_update)))
3712+
self.pending_monitor_updates.push(PendingChannelMonitorUpdate {
3713+
update: monitor_update, blocked: !release_monitor,
3714+
});
3715+
Ok((htlcs_to_fail,
3716+
if release_monitor { self.pending_monitor_updates.last().map(|upd| &upd.update) } else { None }))
36943717
} else {
3695-
log_debug!(logger, "Received a valid revoke_and_ack for channel {} with no reply necessary.", log_bytes!(self.channel_id()));
3718+
log_debug!(logger, "Received a valid revoke_and_ack for channel {} with no reply necessary. {} monitor update.",
3719+
log_bytes!(self.channel_id()), release_state_str);
3720+
36963721
self.monitor_updating_paused(false, false, false, to_forward_infos, revoked_htlcs, finalized_claimed_htlcs);
3697-
Ok((htlcs_to_fail, self.push_ret_blockable_mon_update(monitor_update)))
3722+
self.pending_monitor_updates.push(PendingChannelMonitorUpdate {
3723+
update: monitor_update, blocked: !release_monitor,
3724+
});
3725+
Ok((htlcs_to_fail,
3726+
if release_monitor { self.pending_monitor_updates.last().map(|upd| &upd.update) } else { None }))
36983727
}
36993728
}
37003729
}

0 commit comments

Comments
 (0)