Skip to content

Commit 267b9a4

Browse files
committed
ChannelManager support for monitor update failure in one place
1 parent c36d231 commit 267b9a4

File tree

3 files changed

+157
-19
lines changed

3 files changed

+157
-19
lines changed

src/ln/channelmanager.rs

Lines changed: 136 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use secp256k1;
2323
use chain::chaininterface::{BroadcasterInterface,ChainListener,ChainWatchInterface,FeeEstimator};
2424
use chain::transaction::OutPoint;
2525
use ln::channel::{Channel, ChannelError, ChannelKeys};
26-
use ln::channelmonitor::{ManyChannelMonitor, CLTV_CLAIM_BUFFER, HTLC_FAIL_TIMEOUT_BLOCKS};
26+
use ln::channelmonitor::{ChannelMonitorUpdateErr, ManyChannelMonitor, CLTV_CLAIM_BUFFER, HTLC_FAIL_TIMEOUT_BLOCKS};
2727
use ln::router::{Route,RouteHop};
2828
use ln::msgs;
2929
use ln::msgs::{ChannelMessageHandler, HandleError, RAACommitmentOrder};
@@ -586,6 +586,33 @@ impl ChannelManager {
586586
}
587587
}
588588

589+
fn handle_monitor_update_fail(&self, mut channel_state_lock: MutexGuard<ChannelHolder>, channel_id: &[u8; 32], err: ChannelMonitorUpdateErr, reason: RAACommitmentOrder) {
590+
match err {
591+
ChannelMonitorUpdateErr::PermanentFailure => {
592+
let mut chan = {
593+
let channel_state = channel_state_lock.borrow_parts();
594+
let chan = channel_state.by_id.remove(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
595+
if let Some(short_id) = chan.get_short_channel_id() {
596+
channel_state.short_to_id.remove(&short_id);
597+
}
598+
chan
599+
};
600+
mem::drop(channel_state_lock);
601+
self.finish_force_close_channel(chan.force_shutdown());
602+
let mut events = self.pending_events.lock().unwrap();
603+
if let Ok(update) = self.get_channel_update(&chan) {
604+
events.push(events::Event::BroadcastChannelUpdate {
605+
msg: update
606+
});
607+
}
608+
},
609+
ChannelMonitorUpdateErr::TemporaryFailure => {
610+
let channel = channel_state_lock.by_id.get_mut(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
611+
channel.monitor_update_failed(reason);
612+
},
613+
}
614+
}
615+
589616
#[inline]
590617
fn gen_rho_mu_from_shared_secret(shared_secret: &SharedSecret) -> ([u8; 32], [u8; 32]) {
591618
({
@@ -984,6 +1011,11 @@ impl ChannelManager {
9841011
if let Some((err, code, chan_update)) = loop {
9851012
let chan = channel_state.as_mut().unwrap().by_id.get_mut(&forwarding_id).unwrap();
9861013

1014+
// Note that we could technically not return an error yet here and just hope
1015+
// that the connection is reestablished or monitor updated by the time we get
1016+
// around to doing the actual forward, but better to fail early if we can and
1017+
// hopefully an attacker trying to path-trace payments cannot make this occur
1018+
// on a small/per-node/per-channel scale.
9871019
if !chan.is_live() { // channel_disabled
9881020
break Some(("Forwarding channel is not in a ready state.", 0x1000 | 20, Some(self.get_channel_update(chan).unwrap())));
9891021
}
@@ -1027,6 +1059,7 @@ impl ChannelManager {
10271059
}
10281060

10291061
/// only fails if the channel does not yet have an assigned short_id
1062+
/// May be called with channel_state already locked!
10301063
fn get_channel_update(&self, chan: &Channel) -> Result<msgs::ChannelUpdate, HandleError> {
10311064
let short_channel_id = match chan.get_short_channel_id() {
10321065
None => return Err(HandleError{err: "Channel not yet established", action: None}),
@@ -1097,30 +1130,36 @@ impl ChannelManager {
10971130
let onion_packet = ChannelManager::construct_onion_packet(onion_payloads, onion_keys, &payment_hash);
10981131

10991132
let (first_hop_node_id, update_add, commitment_signed) = {
1100-
let mut channel_state_lock = self.channel_state.lock().unwrap();
1101-
let channel_state = channel_state_lock.borrow_parts();
1133+
let mut channel_state = self.channel_state.lock().unwrap();
11021134

11031135
let id = match channel_state.short_to_id.get(&route.hops.first().unwrap().short_channel_id) {
11041136
None => return Err(APIError::ChannelUnavailable{err: "No channel available with first hop!"}),
11051137
Some(id) => id.clone(),
11061138
};
11071139

11081140
let res = {
1109-
let chan = channel_state.by_id.get_mut(&id).unwrap();
1110-
if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
1111-
return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
1112-
}
1113-
if !chan.is_live() {
1114-
return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
1115-
}
1116-
match chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
1117-
route: route.clone(),
1118-
session_priv: session_priv.clone(),
1119-
first_hop_htlc_msat: htlc_msat,
1120-
}, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})? {
1141+
let res = {
1142+
let chan = channel_state.by_id.get_mut(&id).unwrap();
1143+
if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
1144+
return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
1145+
}
1146+
if chan.is_awaiting_monitor_update() {
1147+
return Err(APIError::MonitorUpdateFailed);
1148+
}
1149+
if !chan.is_live() {
1150+
return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
1151+
}
1152+
chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
1153+
route: route.clone(),
1154+
session_priv: session_priv.clone(),
1155+
first_hop_htlc_msat: htlc_msat,
1156+
}, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})?
1157+
};
1158+
match res {
11211159
Some((update_add, commitment_signed, chan_monitor)) => {
1122-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1123-
unimplemented!();
1160+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1161+
self.handle_monitor_update_fail(channel_state, &id, e, RAACommitmentOrder::CommitmentFirst);
1162+
return Err(APIError::MonitorUpdateFailed);
11241163
}
11251164
Some((update_add, commitment_signed))
11261165
},
@@ -1540,7 +1579,83 @@ impl ChannelManager {
15401579
/// ChannelMonitorUpdateErr::TemporaryFailure was returned from a channel monitor update
15411580
/// operation.
15421581
pub fn test_restore_channel_monitor(&self) {
1543-
unimplemented!();
1582+
let mut new_events = Vec::new();
1583+
let mut close_results = Vec::new();
1584+
let mut htlc_forwards = Vec::new();
1585+
let mut htlc_failures = Vec::new();
1586+
1587+
{
1588+
let mut channel_lock = self.channel_state.lock().unwrap();
1589+
let channel_state = channel_lock.borrow_parts();
1590+
let short_to_id = channel_state.short_to_id;
1591+
channel_state.by_id.retain(|_, channel| {
1592+
if channel.is_awaiting_monitor_update() {
1593+
let chan_monitor = channel.channel_monitor();
1594+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1595+
match e {
1596+
ChannelMonitorUpdateErr::PermanentFailure => {
1597+
if let Some(short_id) = channel.get_short_channel_id() {
1598+
short_to_id.remove(&short_id);
1599+
}
1600+
close_results.push(channel.force_shutdown());
1601+
if let Ok(update) = self.get_channel_update(&channel) {
1602+
new_events.push(events::Event::BroadcastChannelUpdate {
1603+
msg: update
1604+
});
1605+
}
1606+
false
1607+
},
1608+
ChannelMonitorUpdateErr::TemporaryFailure => true,
1609+
}
1610+
} else {
1611+
let (raa, commitment_update, order, pending_forwards, mut pending_failures) = channel.monitor_updating_restored();
1612+
if !pending_forwards.is_empty() {
1613+
htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards));
1614+
}
1615+
htlc_failures.append(&mut pending_failures);
1616+
1617+
macro_rules! handle_cs { () => {
1618+
if let Some(update) = commitment_update {
1619+
new_events.push(events::Event::UpdateHTLCs {
1620+
node_id: channel.get_their_node_id(),
1621+
updates: update,
1622+
});
1623+
}
1624+
} }
1625+
macro_rules! handle_raa { () => {
1626+
if let Some(revoke_and_ack) = raa {
1627+
new_events.push(events::Event::SendRevokeAndACK {
1628+
node_id: channel.get_their_node_id(),
1629+
msg: revoke_and_ack,
1630+
});
1631+
}
1632+
} }
1633+
match order {
1634+
RAACommitmentOrder::CommitmentFirst => {
1635+
handle_cs!();
1636+
handle_raa!();
1637+
},
1638+
RAACommitmentOrder::RevokeAndACKFirst => {
1639+
handle_raa!();
1640+
handle_cs!();
1641+
},
1642+
}
1643+
true
1644+
}
1645+
} else { true }
1646+
});
1647+
}
1648+
1649+
for failure in htlc_failures.drain(..) {
1650+
self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
1651+
}
1652+
self.forward_htlcs(&mut htlc_forwards[..]);
1653+
1654+
for res in close_results.drain(..) {
1655+
self.finish_force_close_channel(res);
1656+
}
1657+
1658+
self.pending_events.lock().unwrap().append(&mut new_events);
15441659
}
15451660

15461661
fn internal_open_channel(&self, their_node_id: &PublicKey, msg: &msgs::OpenChannel) -> Result<msgs::AcceptChannel, MsgHandleErrInternal> {
@@ -2211,6 +2326,9 @@ impl ChannelManager {
22112326
if !chan.is_outbound() {
22122327
return Err(APIError::APIMisuseError{err: "update_fee cannot be sent for an inbound channel"});
22132328
}
2329+
if chan.is_awaiting_monitor_update() {
2330+
return Err(APIError::MonitorUpdateFailed);
2331+
}
22142332
if !chan.is_live() {
22152333
return Err(APIError::ChannelUnavailable{err: "Channel is either not yet fully established or peer is currently disconnected"});
22162334
}

src/ln/channelmonitor.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,22 @@ pub enum ChannelMonitorUpdateErr {
4747
/// submitting new commitment transactions to the remote party.
4848
/// ChannelManager::test_restore_channel_monitor can be used to retry the update(s) and restore
4949
/// the channel to an operational state.
50+
///
51+
/// Note that continuing to operate when no copy of the updated ChannelMonitor could be
52+
/// persisted is unsafe - if you failed to store the update on your own local disk you should
53+
/// instead return PermanentFailure to force closure of the channel ASAP.
54+
///
55+
/// Even when a channel has been "frozen" updates to the ChannelMonitor can continue to occur
56+
/// (eg if an inbound HTLC which we forwarded was claimed upstream resulting in us attempting
57+
/// to claim it on this channel) and those updates must be applied wherever they can be. At
58+
/// least one such updated ChannelMonitor must be persisted otherwise PermanentFailure should
59+
/// be returned to get things on-chain ASAP using only the in-memory copy. Obviously updates to
60+
/// the channel which would invalidate previous ChannelMonitors are not made when a channel has
61+
/// been "frozen".
62+
///
63+
/// Note that even if updates made after TemporaryFailure succeed you must still call
64+
/// test_restore_channel_monitor to ensure you have the latest monitor and re-enable normal
65+
/// channel operation.
5066
TemporaryFailure,
5167
/// Used to indicate no further channel monitor updates will be allowed (eg we've moved on to a
5268
/// different watchtower and cannot update with all watchtowers that were previously informed

src/util/errors.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ pub enum APIError {
3232
ChannelUnavailable {
3333
/// A human-readable error message
3434
err: &'static str
35-
}
35+
},
36+
/// An attempt to call add_update_monitor returned an Err (ie you did this!), causing the
37+
/// attempted action to fail.
38+
MonitorUpdateFailed,
3639
}
3740

3841
impl fmt::Debug for APIError {
@@ -42,6 +45,7 @@ impl fmt::Debug for APIError {
4245
APIError::FeeRateTooHigh {ref err, ref feerate} => write!(f, "{} feerate: {}", err, feerate),
4346
APIError::RouteError {ref err} => f.write_str(err),
4447
APIError::ChannelUnavailable {ref err} => f.write_str(err),
48+
APIError::MonitorUpdateFailed => f.write_str("Client indicated a channel monitor update failed"),
4549
}
4650
}
4751
}

0 commit comments

Comments
 (0)