Skip to content

Commit 5d411aa

Browse files
committed
Delay removal of fulfilled outbound payments for a few timer ticks
Previously, once a fulfilled outbound payment completed and all associated HTLCs were resolved, we'd immediately remove the payment entry from the `pending_outbound_payments` map. Now that we're using the `pending_outbound_payments` map for send idempotency, this presents a race condition - if the user makes a redundant `send_payment` call at the same time that the original payment's last HTLC is resolved, the user would reasonably expect the `send_payment` call to fail due to our idempotency guarantees. However, because the `pending_outbound_payments` entry is being removed, if it completes first the `send_payment` call will succeed even though the user has not had a chance to see the corresponding `Event::PaymentSent`. Instead, here, we delay removal of `Fulfilled` `pending_outbound_payments` entries until several timer ticks have passed without any corresponding event or HTLC pending.
1 parent 9f8783c commit 5d411aa

File tree

2 files changed

+122
-9
lines changed

2 files changed

+122
-9
lines changed

lightning/src/ln/channelmanager.rs

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ pub(crate) enum PendingOutboundPayment {
473473
Fulfilled {
474474
session_privs: HashSet<[u8; 32]>,
475475
payment_hash: Option<PaymentHash>,
476+
timer_ticks_without_htlcs: u8,
476477
},
477478
/// When a payer gives up trying to retry a payment, they inform us, letting us generate a
478479
/// `PaymentFailed` event when all HTLCs have irrevocably failed. This avoids a number of race
@@ -532,7 +533,7 @@ impl PendingOutboundPayment {
532533
=> session_privs,
533534
});
534535
let payment_hash = self.payment_hash();
535-
*self = PendingOutboundPayment::Fulfilled { session_privs, payment_hash };
536+
*self = PendingOutboundPayment::Fulfilled { session_privs, payment_hash, timer_ticks_without_htlcs: 0 };
536537
}
537538

538539
fn mark_abandoned(&mut self) -> Result<(), ()> {
@@ -966,6 +967,11 @@ pub(crate) const PAYMENT_EXPIRY_BLOCKS: u32 = 3;
966967
/// The number of ticks of [`ChannelManager::timer_tick_occurred`] until expiry of incomplete MPPs
967968
pub(crate) const MPP_TIMEOUT_TICKS: u8 = 3;
968969

970+
/// The number of ticks of [`ChannelManager::timer_tick_occurred`] until we time-out the
971+
/// idempotency of payments by [`PaymentId`]. See
972+
/// [`ChannelManager::remove_stale_resolved_payments`].
973+
pub(crate) const IDEMPOTENCY_TIMEOUT_TICKS: u8 = 7;
974+
969975
/// Information needed for constructing an invoice route hint for this channel.
970976
#[derive(Clone, Debug, PartialEq)]
971977
pub struct CounterpartyForwardingInfo {
@@ -3609,6 +3615,45 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
36093615
});
36103616
}
36113617

3618+
fn remove_stale_resolved_payments(&self) {
3619+
// If an outbound payment was completed, and no pending HTLCs remain, we should remove it
3620+
// from the map. However, if we did that immediately when the last payment HTLC is claimed,
3621+
// this could race the user making a duplicate payment call (and relying on our idempotency
3622+
// guarantees on the PaymentId to avoid actually sending). Instead, we wait a few timer
3623+
// ticks to do the actual removal. This should be more than sufficient to ensure any
3624+
// `send_payment` calls that were made at the same time the `PaymentSent` event was being
3625+
// processed complete.
3626+
let mut pending_outbound_payments = self.pending_outbound_payments.lock().unwrap();
3627+
let pending_events = self.pending_events.lock().unwrap();
3628+
pending_outbound_payments.retain(|payment_id, payment| {
3629+
if let PendingOutboundPayment::Fulfilled { session_privs, timer_ticks_without_htlcs, .. } = payment {
3630+
let mut no_remaining_entries = session_privs.is_empty();
3631+
if no_remaining_entries {
3632+
for ev in pending_events.iter() {
3633+
match ev {
3634+
events::Event::PaymentSent { payment_id: Some(ev_payment_id), .. }|
3635+
events::Event::PaymentPathSuccessful { payment_id: ev_payment_id, .. }|
3636+
events::Event::PaymentPathFailed { payment_id: Some(ev_payment_id), .. } => {
3637+
if payment_id == ev_payment_id {
3638+
no_remaining_entries = false;
3639+
break;
3640+
}
3641+
},
3642+
_ => {},
3643+
}
3644+
}
3645+
}
3646+
if no_remaining_entries {
3647+
*timer_ticks_without_htlcs += 1;
3648+
*timer_ticks_without_htlcs <= IDEMPOTENCY_TIMEOUT_TICKS
3649+
} else {
3650+
*timer_ticks_without_htlcs = 0;
3651+
true
3652+
}
3653+
} else { true }
3654+
});
3655+
}
3656+
36123657
/// Performs actions which should happen on startup and roughly once per minute thereafter.
36133658
///
36143659
/// This currently includes:
@@ -3712,6 +3757,9 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
37123757
for (err, counterparty_node_id) in handle_errors.drain(..) {
37133758
let _ = handle_error!(self, err, counterparty_node_id);
37143759
}
3760+
3761+
self.remove_stale_resolved_payments();
3762+
37153763
should_persist
37163764
});
37173765
}
@@ -4229,9 +4277,6 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
42294277
}
42304278
);
42314279
}
4232-
if payment.get().remaining_parts() == 0 {
4233-
payment.remove();
4234-
}
42354280
}
42364281
}
42374282
}
@@ -4277,10 +4322,6 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
42774322
}
42784323
);
42794324
}
4280-
4281-
if payment.get().remaining_parts() == 0 {
4282-
payment.remove();
4283-
}
42844325
}
42854326
} else {
42864327
log_trace!(self.logger, "Received duplicative fulfill for HTLC with payment_preimage {}", log_bytes!(payment_preimage.0));
@@ -6605,6 +6646,7 @@ impl_writeable_tlv_based_enum_upgradable!(PendingOutboundPayment,
66056646
(1, Fulfilled) => {
66066647
(0, session_privs, required),
66076648
(1, payment_hash, option),
6649+
(false, timer_ticks_without_htlcs, (reset_on_reload, 0)),
66086650
},
66096651
(2, Retryable) => {
66106652
(0, session_privs, required),

lightning/src/ln/payment_tests.rs

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ChannelMonitor, LATENCY_GRA
1616
use crate::chain::transaction::OutPoint;
1717
use crate::chain::keysinterface::KeysInterface;
1818
use crate::ln::channel::EXPIRE_PREV_CONFIG_TICKS;
19-
use crate::ln::channelmanager::{self, BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure};
19+
use crate::ln::channelmanager::{self, BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure, IDEMPOTENCY_TIMEOUT_TICKS};
2020
use crate::ln::msgs;
2121
use crate::ln::msgs::ChannelMessageHandler;
2222
use crate::routing::router::{PaymentParameters, get_route};
@@ -1255,3 +1255,74 @@ fn onchain_failed_probe_yields_event() {
12551255
}
12561256
assert!(found_probe_failed);
12571257
}
1258+
1259+
#[test]
1260+
fn claimed_send_payment_idempotent() {
1261+
// Tests that `send_payment` (and friends) are (reasonably) idempotent.
1262+
let chanmon_cfgs = create_chanmon_cfgs(2);
1263+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1264+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1265+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1266+
1267+
create_announced_chan_between_nodes(&nodes, 0, 1, channelmanager::provided_init_features(), channelmanager::provided_init_features()).2;
1268+
1269+
let (route, second_payment_hash, second_payment_preimage, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000);
1270+
let (first_payment_preimage, _, _, payment_id) = send_along_route(&nodes[0], route.clone(), &[&nodes[1]], 100_000);
1271+
1272+
macro_rules! check_send_rejected {
1273+
() => {
1274+
// If we try to resend a new payment with a different payment_hash but with the same
1275+
// payment_id, it should be rejected.
1276+
let send_result = nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret), payment_id);
1277+
match send_result {
1278+
Err(PaymentSendFailure::ParameterError(APIError::RouteError { err: "Payment already in progress" })) => {},
1279+
_ => panic!("Unexpected send result: {:?}", send_result),
1280+
}
1281+
1282+
// Further, if we try to send a spontaneous payment with the same payment_id it should
1283+
// also be rejected.
1284+
let send_result = nodes[0].node.send_spontaneous_payment(&route, None, payment_id);
1285+
match send_result {
1286+
Err(PaymentSendFailure::ParameterError(APIError::RouteError { err: "Payment already in progress" })) => {},
1287+
_ => panic!("Unexpected send result: {:?}", send_result),
1288+
}
1289+
}
1290+
}
1291+
1292+
check_send_rejected!();
1293+
1294+
// Claim the payment backwards, but note that the PaymentSent event is still pending and has
1295+
// not been seen by the user. At this point, from the user perspective nothing has changed, so
1296+
// we must remain just as idempotent as we were before.
1297+
do_claim_payment_along_route(&nodes[0], &[&[&nodes[1]]], false, first_payment_preimage);
1298+
1299+
for _ in 0..=IDEMPOTENCY_TIMEOUT_TICKS {
1300+
nodes[0].node.timer_tick_occurred();
1301+
}
1302+
1303+
check_send_rejected!();
1304+
1305+
// Once the user sees and handles the `PaymentSent` event, we expect them to no longer call
1306+
// `send_payment`, and our idempotency guarantees are off - they should have atomically marked
1307+
// the payment complete. However, they could have called `send_payment` while the event was
1308+
// being processed, leading to a race in our idempotency guarantees. Thus, even immediately
1309+
// after the event is handled a duplicate payment should sitll be rejected.
1310+
expect_payment_sent!(&nodes[0], first_payment_preimage, Some(0));
1311+
check_send_rejected!();
1312+
1313+
// If relatively little time has passed, a duplicate payment should still fail.
1314+
nodes[0].node.timer_tick_occurred();
1315+
check_send_rejected!();
1316+
1317+
// However, after some time has passed (at least more than one timer tick), a duplicate payment
1318+
// should go through, as ChannelManager should no longer have any remaining references to the
1319+
// old payment data.
1320+
for _ in 0..IDEMPOTENCY_TIMEOUT_TICKS {
1321+
nodes[0].node.timer_tick_occurred();
1322+
}
1323+
1324+
nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret), payment_id).unwrap();
1325+
check_added_monitors!(nodes[0], 1);
1326+
pass_along_route(&nodes[0], &[&[&nodes[1]]], 100_000, second_payment_hash, second_payment_secret);
1327+
claim_payment(&nodes[0], &[&nodes[1]], second_payment_preimage);
1328+
}

0 commit comments

Comments
 (0)