Skip to content

Commit 4956ad6

Browse files
committed
Fail holding-cell AddHTLCs on Channel deser to match disconnection
As Channel::write says in the comment at the top: "we write out as if remove_uncommitted_htlcs_and_mark_paused had just been called", except that we previously deliberately included holding-cell AddHTLC events in the serialization. On the flip side, in remove_uncommitted_htlcs_and_mark_paused, we removed pending AddHTLC events under the assumption that, if we can't forward something ASAP, its better to fail it back to the origin than to sit on it for a while. Given there's likely to be just as large a time-lag between ser/deserialization as between when a peer dis/reconnects, there isn't much of a reason for this difference. Worse, we debug_assert that there are no pending AddHTLC holding cell events when doing a reconnect, so any tests or fuzzers which deserialized a ChannelManager with AddHTLC events would panic. We resolve this by adding logic to fail any holding-cell AddHTLC events upon deserialization, in part because trying to forward it before we're sure we have an up-to-date chain is somewhat risky - the sender may have already gone to chain while our upstream has not.
1 parent 4e82003 commit 4956ad6

File tree

3 files changed

+167
-29
lines changed

3 files changed

+167
-29
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@ use bitcoin::network::constants::Network;
1818
use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdateErr};
1919
use chain::transaction::OutPoint;
2020
use chain::Watch;
21-
use ln::channelmanager::{RAACommitmentOrder, PaymentPreimage, PaymentHash, PaymentSecret, PaymentSendFailure};
21+
use ln::channelmanager::{ChannelManager, ChannelManagerReadArgs, RAACommitmentOrder, PaymentPreimage, PaymentHash, PaymentSecret, PaymentSendFailure};
2222
use ln::features::InitFeatures;
2323
use ln::msgs;
2424
use ln::msgs::{ChannelMessageHandler, ErrorAction, RoutingMessageHandler};
2525
use routing::router::get_route;
26+
use util::config::UserConfig;
2627
use util::enforcing_trait_impls::EnforcingChannelKeys;
2728
use util::events::{Event, EventsProvider, MessageSendEvent, MessageSendEventsProvider};
2829
use util::errors::APIError;
29-
use util::ser::Readable;
30+
use util::ser::{Readable, ReadableArgs, Writeable};
3031

3132
use bitcoin::hashes::sha256::Hash as Sha256;
3233
use bitcoin::hashes::Hash;
@@ -35,6 +36,8 @@ use ln::functional_test_utils::*;
3536

3637
use util::test_utils;
3738

39+
use std::collections::HashMap;
40+
3841
// If persister_fail is true, we have the persister return a PermanentFailure
3942
// instead of the higher-level ChainMonitor.
4043
fn do_test_simple_monitor_permanent_update_fail(persister_fail: bool) {
@@ -1809,6 +1812,140 @@ fn monitor_update_claim_fail_no_response() {
18091812
claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_2, 1_000_000);
18101813
}
18111814

1815+
#[test]
1816+
fn test_chan_reload_discard_outbound_holding() {
1817+
// Test that when we reload a ChannelManager from disk we discard (by failing backwards)
1818+
// outbound HTLCs sitting in the holding cell. We currently assert that there are no holding
1819+
// cell outbound HTLCs when we reconnect to a peer, so this would otherwise fail a
1820+
// debug_assertion, but its also good hygiene - if we are sitting on an HTLC when we reload,
1821+
// its reasonable to assume its been a while, and, short of having some criteria based on the
1822+
// CLTV value, trying to forward it likely doesn't make sense.
1823+
// chanmon_fail_consistency found the debug_assertion failure.
1824+
let chanmon_cfgs = create_chanmon_cfgs(2);
1825+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
1826+
let node_state_0;
1827+
let new_chain_monitor;
1828+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
1829+
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1830+
create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known()).2;
1831+
let logger = test_utils::TestLogger::new();
1832+
1833+
// Start forwarding a payment, skipping the first RAA so A is in AwaitingRAA
1834+
let (payment_preimage_1, payment_hash_1) = get_payment_preimage_hash!(nodes[0]);
1835+
{
1836+
let net_graph_msg_handler = &nodes[0].net_graph_msg_handler;
1837+
let route = get_route(&nodes[0].node.get_our_node_id(), &net_graph_msg_handler.network_graph.read().unwrap(), &nodes[1].node.get_our_node_id(), None, &Vec::new(), 1000000, TEST_FINAL_CLTV, &logger).unwrap();
1838+
nodes[0].node.send_payment(&route, payment_hash_1, &None).unwrap();
1839+
check_added_monitors!(nodes[0], 1);
1840+
}
1841+
1842+
let mut events = nodes[0].node.get_and_clear_pending_msg_events();
1843+
assert_eq!(events.len(), 1);
1844+
let payment_event = SendEvent::from_event(events.pop().unwrap());
1845+
nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &payment_event.msgs[0]);
1846+
nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &payment_event.commitment_msg);
1847+
check_added_monitors!(nodes[1], 1);
1848+
1849+
let (bs_revoke_and_ack, bs_commitment_signed) = get_revoke_commit_msgs!(nodes[1], nodes[0].node.get_our_node_id());
1850+
1851+
// Now forward a second payment, getting it stuck in A's outbound holding cell.
1852+
let (_, payment_hash_2) = get_payment_preimage_hash!(nodes[0]);
1853+
{
1854+
let net_graph_msg_handler = &nodes[0].net_graph_msg_handler;
1855+
let route = get_route(&nodes[0].node.get_our_node_id(), &net_graph_msg_handler.network_graph.read().unwrap(), &nodes[1].node.get_our_node_id(), None, &Vec::new(), 1000000, TEST_FINAL_CLTV, &logger).unwrap();
1856+
nodes[0].node.send_payment(&route, payment_hash_2, &None).unwrap();
1857+
check_added_monitors!(nodes[0], 0);
1858+
}
1859+
1860+
let node_state = nodes[0].node.encode();
1861+
let mut chain_monitor_state = test_utils::TestVecWriter(Vec::new());
1862+
let funding_outpoint = *nodes[0].chain_monitor.chain_monitor.monitors.lock().unwrap().iter().next().unwrap().0;
1863+
nodes[0].chain_monitor.chain_monitor.monitors.lock().unwrap().iter().next().unwrap().1.serialize_for_disk(&mut chain_monitor_state).unwrap();
1864+
1865+
// Now if we pass the RAA back to A it should free the holding cell outbound HTLC.
1866+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
1867+
check_added_monitors!(nodes[0], 1);
1868+
events = nodes[0].node.get_and_clear_pending_msg_events();
1869+
assert_eq!(events.len(), 1);
1870+
let payment_event = SendEvent::from_event(events.pop().unwrap());
1871+
assert_eq!(payment_event.msgs.len(), 1);
1872+
1873+
// Reload A's ChannelManager/Monitor and make sure the reload generates a PaymentFailed for the
1874+
// second payment.
1875+
let mut chain_monitor = <(BlockHash, ChannelMonitor<EnforcingChannelKeys>)>::read(&mut ::std::io::Cursor::new(chain_monitor_state.0)).unwrap().1;
1876+
new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[0].chain_source), nodes[0].tx_broadcaster.clone(), &nodes[0].logger, &node_cfgs[0].fee_estimator, &chanmon_cfgs[0].persister);
1877+
nodes[0].chain_monitor = &new_chain_monitor;
1878+
node_state_0 = {
1879+
let mut channel_monitors = HashMap::new();
1880+
channel_monitors.insert(funding_outpoint, &mut chain_monitor);
1881+
<(BlockHash, ChannelManager<EnforcingChannelKeys, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut ::std::io::Cursor::new(node_state), ChannelManagerReadArgs {
1882+
keys_manager: &nodes[0].keys_manager,
1883+
fee_estimator: &node_cfgs[0].fee_estimator,
1884+
chain_monitor: &nodes[0].chain_monitor,
1885+
logger: &nodes[0].logger,
1886+
tx_broadcaster: &nodes[0].tx_broadcaster,
1887+
default_config: UserConfig::default(),
1888+
channel_monitors,
1889+
}).unwrap().1
1890+
};
1891+
nodes[0].node = &node_state_0;
1892+
assert!(nodes[0].chain_monitor.watch_channel(funding_outpoint, chain_monitor).is_ok());
1893+
check_added_monitors!(nodes[0], 1);
1894+
1895+
let events = nodes[0].node.get_and_clear_pending_events();
1896+
assert_eq!(events.len(), 1);
1897+
match events[0] {
1898+
Event::PaymentFailed { ref payment_hash, rejected_by_dest, .. } => {
1899+
assert_eq!(*payment_hash, payment_hash_2);
1900+
assert!(!rejected_by_dest);
1901+
},
1902+
_ => panic!("Unexpected event"),
1903+
}
1904+
1905+
nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
1906+
1907+
nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty() });
1908+
nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty() });
1909+
1910+
let node_0_reestablish = get_event_msg!(nodes[0], MessageSendEvent::SendChannelReestablish, nodes[1].node.get_our_node_id());
1911+
let node_1_reestablish = get_event_msg!(nodes[1], MessageSendEvent::SendChannelReestablish, nodes[0].node.get_our_node_id());
1912+
1913+
nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &node_1_reestablish);
1914+
nodes[1].node.handle_channel_reestablish(&nodes[0].node.get_our_node_id(), &node_0_reestablish);
1915+
1916+
assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
1917+
1918+
// Make sure nodes[1] rebroadcasts the undelivered messages:
1919+
let node_1_msgs = nodes[1].node.get_and_clear_pending_msg_events();
1920+
assert_eq!(node_1_msgs.len(), 2);
1921+
match node_1_msgs[0] {
1922+
MessageSendEvent::SendRevokeAndACK { ref node_id, ref msg } => {
1923+
assert_eq!(*node_id, nodes[0].node.get_our_node_id());
1924+
assert!(*msg == bs_revoke_and_ack);
1925+
},
1926+
_ => panic!(),
1927+
}
1928+
match node_1_msgs[1] {
1929+
MessageSendEvent::UpdateHTLCs { ref node_id, ref updates } => {
1930+
assert_eq!(*node_id, nodes[0].node.get_our_node_id());
1931+
assert!(updates.commitment_signed == bs_commitment_signed);
1932+
},
1933+
_ => panic!(),
1934+
}
1935+
1936+
nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
1937+
check_added_monitors!(nodes[0], 1);
1938+
nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_commitment_signed);
1939+
check_added_monitors!(nodes[0], 1);
1940+
1941+
nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id()));
1942+
check_added_monitors!(nodes[1], 1);
1943+
expect_pending_htlcs_forwardable!(nodes[1]);
1944+
expect_payment_received!(nodes[1], payment_hash_1, 1_000_000);
1945+
1946+
claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_1, 1_000_000);
1947+
}
1948+
18121949
// confirm_a_first and restore_b_before_conf are wholly unrelated to earlier bools and
18131950
// restore_b_before_conf has no meaning if !confirm_a_first
18141951
fn do_during_funding_monitor_fail(confirm_a_first: bool, restore_b_before_conf: bool) {

lightning/src/ln/channel.rs

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4064,8 +4064,8 @@ impl Readable for InboundHTLCRemovalReason {
40644064

40654065
impl<ChanSigner: ChannelKeys + Writeable> Writeable for Channel<ChanSigner> {
40664066
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ::std::io::Error> {
4067-
// Note that we write out as if remove_uncommitted_htlcs_and_mark_paused had just been
4068-
// called but include holding cell updates (and obviously we don't modify self).
4067+
// Note that we write out as if remove_uncommitted_htlcs_and_mark_paused
4068+
// had just been called.
40694069

40704070
writer.write_all(&[SERIALIZATION_VERSION; 1])?;
40714071
writer.write_all(&[MIN_SERIALIZATION_VERSION; 1])?;
@@ -4156,13 +4156,10 @@ impl<ChanSigner: ChannelKeys + Writeable> Writeable for Channel<ChanSigner> {
41564156
(self.holding_cell_htlc_updates.len() as u64).write(writer)?;
41574157
for update in self.holding_cell_htlc_updates.iter() {
41584158
match update {
4159-
&HTLCUpdateAwaitingACK::AddHTLC { ref amount_msat, ref cltv_expiry, ref payment_hash, ref source, ref onion_routing_packet } => {
4159+
&HTLCUpdateAwaitingACK::AddHTLC { ref payment_hash, ref source, .. } => {
41604160
0u8.write(writer)?;
4161-
amount_msat.write(writer)?;
4162-
cltv_expiry.write(writer)?;
4163-
payment_hash.write(writer)?;
41644161
source.write(writer)?;
4165-
onion_routing_packet.write(writer)?;
4162+
payment_hash.write(writer)?;
41664163
},
41674164
&HTLCUpdateAwaitingACK::ClaimHTLC { ref payment_preimage, ref htlc_id } => {
41684165
1u8.write(writer)?;
@@ -4248,7 +4245,7 @@ impl<ChanSigner: ChannelKeys + Writeable> Writeable for Channel<ChanSigner> {
42484245
}
42494246
}
42504247

4251-
impl<ChanSigner: ChannelKeys + Readable> Readable for Channel<ChanSigner> {
4248+
impl<ChanSigner: ChannelKeys + Readable> Readable for (Channel<ChanSigner>, Vec<(HTLCSource, PaymentHash)>) {
42524249
fn read<R : ::std::io::Read>(reader: &mut R) -> Result<Self, DecodeError> {
42534250
let _ver: u8 = Readable::read(reader)?;
42544251
let min_ver: u8 = Readable::read(reader)?;
@@ -4312,27 +4309,22 @@ impl<ChanSigner: ChannelKeys + Readable> Readable for Channel<ChanSigner> {
43124309
});
43134310
}
43144311

4312+
let mut failed_htlcs: Vec<(HTLCSource, PaymentHash)> = Vec::new();
43154313
let holding_cell_htlc_update_count: u64 = Readable::read(reader)?;
43164314
let mut holding_cell_htlc_updates = Vec::with_capacity(cmp::min(holding_cell_htlc_update_count as usize, OUR_MAX_HTLCS as usize*2));
43174315
for _ in 0..holding_cell_htlc_update_count {
4318-
holding_cell_htlc_updates.push(match <u8 as Readable>::read(reader)? {
4319-
0 => HTLCUpdateAwaitingACK::AddHTLC {
4320-
amount_msat: Readable::read(reader)?,
4321-
cltv_expiry: Readable::read(reader)?,
4322-
payment_hash: Readable::read(reader)?,
4323-
source: Readable::read(reader)?,
4324-
onion_routing_packet: Readable::read(reader)?,
4325-
},
4326-
1 => HTLCUpdateAwaitingACK::ClaimHTLC {
4316+
match <u8 as Readable>::read(reader)? {
4317+
0 => failed_htlcs.push((Readable::read(reader)?, Readable::read(reader)?)),
4318+
1 => holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::ClaimHTLC {
43274319
payment_preimage: Readable::read(reader)?,
43284320
htlc_id: Readable::read(reader)?,
4329-
},
4330-
2 => HTLCUpdateAwaitingACK::FailHTLC {
4321+
}),
4322+
2 => holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::FailHTLC {
43314323
htlc_id: Readable::read(reader)?,
43324324
err_packet: Readable::read(reader)?,
4333-
},
4325+
}),
43344326
_ => return Err(DecodeError::InvalidValue),
4335-
});
4327+
}
43364328
}
43374329

43384330
let resend_order = match <u8 as Readable>::read(reader)? {
@@ -4398,7 +4390,7 @@ impl<ChanSigner: ChannelKeys + Readable> Readable for Channel<ChanSigner> {
43984390
let counterparty_shutdown_scriptpubkey = Readable::read(reader)?;
43994391
let commitment_secrets = Readable::read(reader)?;
44004392

4401-
Ok(Channel {
4393+
Ok((Channel {
44024394
user_id,
44034395

44044396
config,
@@ -4472,7 +4464,7 @@ impl<ChanSigner: ChannelKeys + Readable> Readable for Channel<ChanSigner> {
44724464
commitment_secrets,
44734465

44744466
network_sync: UpdateStatus::Fresh,
4475-
})
4467+
}, failed_htlcs))
44764468
}
44774469
}
44784470

lightning/src/ln/channelmanager.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3871,14 +3871,20 @@ impl<'a, ChanSigner: ChannelKeys + Readable, M: Deref, T: Deref, K: Deref, F: De
38713871
let latest_block_height: u32 = Readable::read(reader)?;
38723872
let last_block_hash: BlockHash = Readable::read(reader)?;
38733873

3874-
let mut failed_htlcs = Vec::new();
3874+
let mut perm_failed_htlcs = Vec::new();
3875+
let mut holding_cell_failed_htlcs = Vec::new();
38753876

38763877
let channel_count: u64 = Readable::read(reader)?;
38773878
let mut funding_txo_set = HashSet::with_capacity(cmp::min(channel_count as usize, 128));
38783879
let mut by_id = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
38793880
let mut short_to_id = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
38803881
for _ in 0..channel_count {
3881-
let mut channel: Channel<ChanSigner> = Readable::read(reader)?;
3882+
let channel_and_failed_htlcs: (Channel<ChanSigner>, Vec<(HTLCSource, PaymentHash)>) = Readable::read(reader)?;
3883+
let (mut channel, chan_failed_htlcs) = channel_and_failed_htlcs;
3884+
for (_, ref payment_hash) in chan_failed_htlcs.iter() {
3885+
log_trace!(args.logger, "Going to fail HTLC with hash {} which was pending-forwarding when we were serialized.", log_bytes!(&payment_hash.0[..]));
3886+
}
3887+
holding_cell_failed_htlcs.push((channel.channel_id(), chan_failed_htlcs));
38823888
if channel.last_block_connected != Default::default() && channel.last_block_connected != last_block_hash {
38833889
return Err(DecodeError::InvalidValue);
38843890
}
@@ -3898,7 +3904,7 @@ impl<'a, ChanSigner: ChannelKeys + Readable, M: Deref, T: Deref, K: Deref, F: De
38983904
channel.get_latest_monitor_update_id() < monitor.get_latest_update_id() {
38993905
// But if the channel is behind of the monitor, close the channel:
39003906
let (_, _, mut new_failed_htlcs) = channel.force_shutdown(true);
3901-
failed_htlcs.append(&mut new_failed_htlcs);
3907+
perm_failed_htlcs.append(&mut new_failed_htlcs);
39023908
monitor.broadcast_latest_holder_commitment_txn(&args.tx_broadcaster, &args.logger);
39033909
} else {
39043910
if let Some(short_channel_id) = channel.get_short_channel_id() {
@@ -3993,9 +3999,12 @@ impl<'a, ChanSigner: ChannelKeys + Readable, M: Deref, T: Deref, K: Deref, F: De
39933999
default_configuration: args.default_config,
39944000
};
39954001

3996-
for htlc_source in failed_htlcs.drain(..) {
4002+
for htlc_source in perm_failed_htlcs.drain(..) {
39974003
channel_manager.fail_htlc_backwards_internal(channel_manager.channel_state.lock().unwrap(), htlc_source.0, &htlc_source.1, HTLCFailReason::Reason { failure_code: 0x4000 | 8, data: Vec::new() });
39984004
}
4005+
for (chan_id, htlcs) in holding_cell_failed_htlcs.drain(..) {
4006+
channel_manager.fail_holding_cell_htlcs(htlcs, chan_id);
4007+
}
39994008

40004009
//TODO: Broadcast channel update for closed channels, but only after we've made a
40014010
//connection or two.

0 commit comments

Comments
 (0)