Skip to content

Commit 1367c88

Browse files
committed
f maybe fix test on ci?
1 parent c6bbb99 commit 1367c88

File tree

1 file changed

+152
-77
lines changed

1 file changed

+152
-77
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 152 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -3862,53 +3862,82 @@ fn test_claim_to_closed_channel_blocks_claimed_event() {
38623862
}
38633863

38643864
#[test]
3865-
#[cfg(feature = "std")]
3865+
#[cfg(all(feature = "std", not(target_os = "windows")))]
38663866
fn test_single_channel_multiple_mpp() {
3867+
use std::sync::atomic::{AtomicBool, Ordering};
3868+
38673869
// Test what happens when we attempt to claim an MPP with many parts that came to us through
38683870
// the same channel with a synchronous persistence interface which has very high latency.
38693871
//
38703872
// Previously, if a `revoke_and_ack` came in while we were still running in
38713873
// `ChannelManager::claim_payment` we'd end up hanging waiting to apply a
38723874
// `ChannelMonitorUpdate` until after it completed. See the commit which introduced this test
38733875
// for more info.
3874-
let chanmon_cfgs = create_chanmon_cfgs(7);
3875-
let node_cfgs = create_node_cfgs(7, &chanmon_cfgs);
3876-
let node_chanmgrs = create_node_chanmgrs(7, &node_cfgs, &[None; 7]);
3877-
let mut nodes = create_network(7, &node_cfgs, &node_chanmgrs);
3878-
3879-
let node_5_id = nodes[5].node.get_our_node_id();
3880-
let node_6_id = nodes[6].node.get_our_node_id();
3881-
3882-
// Send an MPP payment in four parts along the path shown from top to bottom
3883-
// 0
3884-
// 1 2 3 4
3885-
// 5
3886-
// 6
3876+
let chanmon_cfgs = create_chanmon_cfgs(9);
3877+
let node_cfgs = create_node_cfgs(9, &chanmon_cfgs);
3878+
let node_chanmgrs = create_node_chanmgrs(9, &node_cfgs, &[None; 9]);
3879+
let mut nodes = create_network(9, &node_cfgs, &node_chanmgrs);
3880+
3881+
let node_7_id = nodes[7].node.get_our_node_id();
3882+
let node_8_id = nodes[8].node.get_our_node_id();
3883+
3884+
// Send an MPP payment in six parts along the path shown from top to bottom
3885+
// 0
3886+
// 1 2 3 4 5 6
3887+
// 7
3888+
// 8
3889+
//
3890+
// We can in theory reproduce this issue with fewer channels/HTLCs, but getting this test
3891+
// robust is rather challenging. We rely on having the main test thread wait on locks held in
3892+
// the background `claim_funds` thread and unlocking when the `claim_funds` thread completes a
3893+
// single `ChannelMonitorUpdate`.
3894+
// This thread calls `get_and_clear_pending_msg_events()` and `handle_revoke_and_ack()`, both
3895+
// of which require `ChannelManager` locks, but we have to make sure this thread gets a chance
3896+
// to be blocked on the mutexes before we let the background thread wake `claim_funds` so that
3897+
// the mutex can switch to this main thread.
3898+
// This relies on our locks being fair, but also on our threads getting runtime during the test
3899+
// run, which can be pretty competitive. Thus we do a dumb dance to be as conservative as
3900+
// possible - we have a background thread which completes a `ChannelMonitorUpdate` (by sending
3901+
// into the `write_blocker` mpsc) but it doesn't run until a mpsc channel sends from this main
3902+
// thread to the background thread, and then we let it sleep a while before we send the
3903+
// `ChannelMonitorUpdate` unblocker.
3904+
// Further, we give ourselves two chances each time, needing 4 HTLCs just to unlock our two
3905+
// `ChannelManager` calls. We then need a few remaining HTLCs to actually trigger the bug, so
3906+
// we use 6 HTLCs.
3907+
// Finaly, we do not run this test on Winblowz because it, somehow, in 2025, does not implement
3908+
// actual preemptive multitasking and thinks that cooperative multitasking somehow is
3909+
// acceptable in the 21st century, let alone a quarter of the way into it.
3910+
const MAX_THREAD_INIT_TIME: std::time::Duration = std::time::Duration::from_secs(1);
38873911

38883912
create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 100_000, 0);
38893913
create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 100_000, 0);
38903914
create_announced_chan_between_nodes_with_value(&nodes, 0, 3, 100_000, 0);
38913915
create_announced_chan_between_nodes_with_value(&nodes, 0, 4, 100_000, 0);
3892-
create_announced_chan_between_nodes_with_value(&nodes, 1, 5, 100_000, 0);
3893-
create_announced_chan_between_nodes_with_value(&nodes, 2, 5, 100_000, 0);
3894-
create_announced_chan_between_nodes_with_value(&nodes, 3, 5, 100_000, 0);
3895-
create_announced_chan_between_nodes_with_value(&nodes, 4, 5, 100_000, 0);
3896-
create_announced_chan_between_nodes_with_value(&nodes, 5, 6, 1_000_000, 0);
3916+
create_announced_chan_between_nodes_with_value(&nodes, 0, 5, 100_000, 0);
3917+
create_announced_chan_between_nodes_with_value(&nodes, 0, 6, 100_000, 0);
38973918

3898-
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[6], 30_000_000);
3919+
create_announced_chan_between_nodes_with_value(&nodes, 1, 7, 100_000, 0);
3920+
create_announced_chan_between_nodes_with_value(&nodes, 2, 7, 100_000, 0);
3921+
create_announced_chan_between_nodes_with_value(&nodes, 3, 7, 100_000, 0);
3922+
create_announced_chan_between_nodes_with_value(&nodes, 4, 7, 100_000, 0);
3923+
create_announced_chan_between_nodes_with_value(&nodes, 5, 7, 100_000, 0);
3924+
create_announced_chan_between_nodes_with_value(&nodes, 6, 7, 100_000, 0);
3925+
create_announced_chan_between_nodes_with_value(&nodes, 7, 8, 1_000_000, 0);
38993926

3900-
send_along_route_with_secret(&nodes[0], route, &[&[&nodes[1], &nodes[5], &nodes[6]], &[&nodes[2], &nodes[5], &nodes[6]], &[&nodes[3], &nodes[5], &nodes[6]], &[&nodes[4], &nodes[5], &nodes[6]]], 30_000_000, payment_hash, payment_secret);
3927+
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[8], 50_000_000);
3928+
3929+
send_along_route_with_secret(&nodes[0], route, &[&[&nodes[1], &nodes[7], &nodes[8]], &[&nodes[2], &nodes[7], &nodes[8]], &[&nodes[3], &nodes[7], &nodes[8]], &[&nodes[4], &nodes[7], &nodes[8]], &[&nodes[5], &nodes[7], &nodes[8]], &[&nodes[6], &nodes[7], &nodes[8]]], 50_000_000, payment_hash, payment_secret);
39013930

39023931
let (do_a_write, blocker) = std::sync::mpsc::sync_channel(0);
3903-
*nodes[6].chain_monitor.write_blocker.lock().unwrap() = Some(blocker);
3932+
*nodes[8].chain_monitor.write_blocker.lock().unwrap() = Some(blocker);
39043933

39053934
// Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }.
39063935
// We do this by casting a pointer to a `TestChannelManager` to a pointer to a
39073936
// `TestChannelManager` with different (in this case 'static) lifetime.
39083937
// This is even suggested in the second example at
39093938
// https://doc.rust-lang.org/std/mem/fn.transmute.html#examples
39103939
let claim_node: &'static TestChannelManager<'static, 'static> =
3911-
unsafe { std::mem::transmute(nodes[6].node as &TestChannelManager) };
3940+
unsafe { std::mem::transmute(nodes[8].node as &TestChannelManager) };
39123941
let thrd = std::thread::spawn(move || {
39133942
// Initiate the claim in a background thread as it will immediately block waiting on the
39143943
// `write_blocker` we set above.
@@ -3924,85 +3953,131 @@ fn test_single_channel_multiple_mpp() {
39243953
// `claim_funds` is holding. Thus, we release a second write after a small sleep in the
39253954
// background to give `claim_funds` a chance to step forward, unblocking
39263955
// `get_and_clear_pending_msg_events`.
3927-
const MAX_THREAD_INIT_TIME: std::time::Duration = std::time::Duration::from_millis(10);
39283956
let do_a_write_background = do_a_write.clone();
3957+
let block_thrd2 = AtomicBool::new(true);
3958+
let block_thrd2_read: &'static AtomicBool = unsafe { std::mem::transmute(&block_thrd2) };
39293959
let thrd2 = std::thread::spawn(move || {
3960+
while block_thrd2_read.load(Ordering::Acquire) {
3961+
std::thread::yield_now();
3962+
}
3963+
std::thread::sleep(MAX_THREAD_INIT_TIME);
3964+
do_a_write_background.send(()).unwrap();
39303965
std::thread::sleep(MAX_THREAD_INIT_TIME);
39313966
do_a_write_background.send(()).unwrap();
39323967
});
3933-
let first_updates = get_htlc_update_msgs(&nodes[6], &nodes[5].node.get_our_node_id());
3968+
block_thrd2.store(false, Ordering::Release);
3969+
let first_updates = get_htlc_update_msgs(&nodes[8], &nodes[7].node.get_our_node_id());
39343970
thrd2.join().unwrap();
39353971

3936-
nodes[5].node.peer_disconnected(nodes[1].node.get_our_node_id());
3937-
nodes[5].node.peer_disconnected(nodes[2].node.get_our_node_id());
3938-
nodes[5].node.peer_disconnected(nodes[3].node.get_our_node_id());
3939-
nodes[5].node.peer_disconnected(nodes[4].node.get_our_node_id());
3940-
3941-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &first_updates.update_fulfill_htlcs[0]);
3942-
check_added_monitors(&nodes[5], 1);
3943-
expect_payment_forwarded!(nodes[5], nodes[1], nodes[6], Some(1000), false, false);
3944-
nodes[5].node.handle_commitment_signed(node_6_id, &first_updates.commitment_signed);
3945-
check_added_monitors(&nodes[5], 1);
3946-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
3972+
// Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back
3973+
nodes[7].node.peer_disconnected(nodes[1].node.get_our_node_id());
3974+
nodes[7].node.peer_disconnected(nodes[2].node.get_our_node_id());
3975+
nodes[7].node.peer_disconnected(nodes[3].node.get_our_node_id());
3976+
nodes[7].node.peer_disconnected(nodes[4].node.get_our_node_id());
3977+
nodes[7].node.peer_disconnected(nodes[5].node.get_our_node_id());
3978+
nodes[7].node.peer_disconnected(nodes[6].node.get_our_node_id());
3979+
3980+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &first_updates.update_fulfill_htlcs[0]);
3981+
check_added_monitors(&nodes[7], 1);
3982+
expect_payment_forwarded!(nodes[7], nodes[1], nodes[8], Some(1000), false, false);
3983+
nodes[7].node.handle_commitment_signed(node_8_id, &first_updates.commitment_signed);
3984+
check_added_monitors(&nodes[7], 1);
3985+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
39473986

39483987
// Now, handle the `revoke_and_ack` from node 5. Note that `claim_funds` is still blocked on
39493988
// our peer lock, so we have to release a write to let it process.
3989+
// After this call completes, the channel previously would be locked up and should not be able
3990+
// to make further progress.
39503991
let do_a_write_background = do_a_write.clone();
3992+
let block_thrd3 = AtomicBool::new(true);
3993+
let block_thrd3_read: &'static AtomicBool = unsafe { std::mem::transmute(&block_thrd3) };
39513994
let thrd3 = std::thread::spawn(move || {
3995+
while block_thrd3_read.load(Ordering::Acquire) {
3996+
std::thread::yield_now();
3997+
}
3998+
std::thread::sleep(MAX_THREAD_INIT_TIME);
3999+
do_a_write_background.send(()).unwrap();
39524000
std::thread::sleep(MAX_THREAD_INIT_TIME);
39534001
do_a_write_background.send(()).unwrap();
39544002
});
3955-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
4003+
block_thrd3.store(false, Ordering::Release);
4004+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
39564005
thrd3.join().unwrap();
4006+
assert!(!thrd.is_finished());
39574007

39584008
let thrd4 = std::thread::spawn(move || {
3959-
std::thread::sleep(MAX_THREAD_INIT_TIME);
39604009
do_a_write.send(()).unwrap();
39614010
do_a_write.send(()).unwrap();
39624011
});
39634012

39644013
thrd4.join().unwrap();
39654014
thrd.join().unwrap();
39664015

3967-
expect_payment_claimed!(nodes[6], payment_hash, 30_000_000);
4016+
expect_payment_claimed!(nodes[8], payment_hash, 50_000_000);
39684017

3969-
// At the end, we should have 5 ChannelMonitorUpdates - 4 for HTLC claims, and one for the
4018+
// At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the
39704019
// above `revoke_and_ack`.
3971-
check_added_monitors(&nodes[6], 5);
3972-
3973-
// Now drive everything to the end, at least as far as node 6 is concerned...
3974-
*nodes[6].chain_monitor.write_blocker.lock().unwrap() = None;
3975-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
3976-
check_added_monitors(&nodes[6], 1);
3977-
3978-
let (updates, raa) = get_updates_and_revoke(&nodes[6], &nodes[5].node.get_our_node_id());
3979-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[0]);
3980-
expect_payment_forwarded!(nodes[5], nodes[2], nodes[6], Some(1000), false, false);
3981-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[1]);
3982-
expect_payment_forwarded!(nodes[5], nodes[3], nodes[6], Some(1000), false, false);
3983-
nodes[5].node.handle_commitment_signed(node_6_id, &updates.commitment_signed);
3984-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
3985-
check_added_monitors(&nodes[5], 4);
3986-
3987-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
3988-
3989-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
3990-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
3991-
check_added_monitors(&nodes[6], 2);
3992-
3993-
let (updates, raa) = get_updates_and_revoke(&nodes[6], &nodes[5].node.get_our_node_id());
3994-
nodes[5].node.handle_update_fulfill_htlc(node_6_id, &updates.update_fulfill_htlcs[0]);
3995-
expect_payment_forwarded!(nodes[5], nodes[4], nodes[6], Some(1000), false, false);
3996-
nodes[5].node.handle_commitment_signed(node_6_id, &updates.commitment_signed);
3997-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
3998-
check_added_monitors(&nodes[5], 3);
3999-
4000-
let (raa, cs) = get_revoke_commit_msgs(&nodes[5], &node_6_id);
4001-
nodes[6].node.handle_revoke_and_ack(node_5_id, &raa);
4002-
nodes[6].node.handle_commitment_signed(node_5_id, &cs);
4003-
check_added_monitors(&nodes[6], 2);
4004-
4005-
let raa = get_event_msg!(nodes[6], MessageSendEvent::SendRevokeAndACK, node_5_id);
4006-
nodes[5].node.handle_revoke_and_ack(node_6_id, &raa);
4007-
check_added_monitors(&nodes[5], 1);
4020+
check_added_monitors(&nodes[8], 7);
4021+
4022+
// Now drive everything to the end, at least as far as node 7 is concerned...
4023+
*nodes[8].chain_monitor.write_blocker.lock().unwrap() = None;
4024+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4025+
check_added_monitors(&nodes[8], 1);
4026+
4027+
let (updates, raa) = get_updates_and_revoke(&nodes[8], &nodes[7].node.get_our_node_id());
4028+
4029+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[0]);
4030+
expect_payment_forwarded!(nodes[7], nodes[2], nodes[8], Some(1000), false, false);
4031+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[1]);
4032+
expect_payment_forwarded!(nodes[7], nodes[3], nodes[8], Some(1000), false, false);
4033+
let mut next_source = 4;
4034+
if let Some(update) = updates.update_fulfill_htlcs.get(2) {
4035+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, update);
4036+
expect_payment_forwarded!(nodes[7], nodes[4], nodes[8], Some(1000), false, false);
4037+
next_source += 1;
4038+
}
4039+
4040+
nodes[7].node.handle_commitment_signed(node_8_id, &updates.commitment_signed);
4041+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4042+
if updates.update_fulfill_htlcs.get(2).is_some() {
4043+
check_added_monitors(&nodes[7], 5);
4044+
} else {
4045+
check_added_monitors(&nodes[7], 4);
4046+
}
4047+
4048+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
4049+
4050+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
4051+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4052+
check_added_monitors(&nodes[8], 2);
4053+
4054+
let (updates, raa) = get_updates_and_revoke(&nodes[8], &node_7_id);
4055+
4056+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[0]);
4057+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4058+
next_source += 1;
4059+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, &updates.update_fulfill_htlcs[1]);
4060+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4061+
next_source += 1;
4062+
if let Some(update) = updates.update_fulfill_htlcs.get(2) {
4063+
nodes[7].node.handle_update_fulfill_htlc(node_8_id, update);
4064+
expect_payment_forwarded!(nodes[7], nodes[next_source], nodes[8], Some(1000), false, false);
4065+
}
4066+
4067+
nodes[7].node.handle_commitment_signed(node_8_id, &updates.commitment_signed);
4068+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4069+
if updates.update_fulfill_htlcs.get(2).is_some() {
4070+
check_added_monitors(&nodes[7], 5);
4071+
} else {
4072+
check_added_monitors(&nodes[7], 4);
4073+
}
4074+
4075+
let (raa, cs) = get_revoke_commit_msgs(&nodes[7], &node_8_id);
4076+
nodes[8].node.handle_revoke_and_ack(node_7_id, &raa);
4077+
nodes[8].node.handle_commitment_signed(node_7_id, &cs);
4078+
check_added_monitors(&nodes[8], 2);
4079+
4080+
let raa = get_event_msg!(nodes[8], MessageSendEvent::SendRevokeAndACK, node_7_id);
4081+
nodes[7].node.handle_revoke_and_ack(node_8_id, &raa);
4082+
check_added_monitors(&nodes[7], 1);
40084083
}

0 commit comments

Comments
 (0)