Skip to content

Commit d7e60fa

Browse files
committed
ChannelManager support for monitor update failure in one place
1 parent 124d0d3 commit d7e60fa

File tree

3 files changed

+154
-18
lines changed

3 files changed

+154
-18
lines changed

src/ln/channelmanager.rs

Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use secp256k1;
2323
use chain::chaininterface::{BroadcasterInterface,ChainListener,ChainWatchInterface,FeeEstimator};
2424
use chain::transaction::OutPoint;
2525
use ln::channel::{Channel, ChannelError, ChannelKeys};
26-
use ln::channelmonitor::ManyChannelMonitor;
26+
use ln::channelmonitor::{ChannelMonitorUpdateErr, ManyChannelMonitor};
2727
use ln::router::{Route,RouteHop};
2828
use ln::msgs;
2929
use ln::msgs::{ChannelMessageHandler, HandleError, RAACommitmentOrder};
@@ -562,6 +562,33 @@ impl ChannelManager {
562562
}
563563
}
564564

565+
fn handle_monitor_update_fail(&self, mut channel_state_lock: MutexGuard<ChannelHolder>, channel_id: &[u8; 32], err: ChannelMonitorUpdateErr, reason: RAACommitmentOrder) {
566+
match err {
567+
ChannelMonitorUpdateErr::PermanentFailure => {
568+
let mut chan = {
569+
let channel_state = channel_state_lock.borrow_parts();
570+
let chan = channel_state.by_id.remove(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
571+
if let Some(short_id) = chan.get_short_channel_id() {
572+
channel_state.short_to_id.remove(&short_id);
573+
}
574+
chan
575+
};
576+
mem::drop(channel_state_lock);
577+
self.finish_force_close_channel(chan.force_shutdown());
578+
let mut events = self.pending_events.lock().unwrap();
579+
if let Ok(update) = self.get_channel_update(&chan) {
580+
events.push(events::Event::BroadcastChannelUpdate {
581+
msg: update
582+
});
583+
}
584+
},
585+
ChannelMonitorUpdateErr::TemporaryFailure => {
586+
let channel = channel_state_lock.by_id.get_mut(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
587+
channel.monitor_update_failed(reason);
588+
},
589+
}
590+
}
591+
565592
#[inline]
566593
fn gen_rho_mu_from_shared_secret(shared_secret: &SharedSecret) -> ([u8; 32], [u8; 32]) {
567594
({
@@ -955,6 +982,11 @@ impl ChannelManager {
955982
};
956983
if let Some((err, code, chan_update)) = {
957984
let chan = channel_state.as_mut().unwrap().by_id.get_mut(&forwarding_id).unwrap();
985+
// Note that we could technically not return an error yet here and just hope
986+
// that the connection is reestablished or monitor updated by the time we get
987+
// around to doing the actual forward, but better to fail early if we can and
988+
// hopefully an attacker trying to path-trace payments cannot make this occur
989+
// on a small/per-node/per-channel scale.
958990
if !chan.is_live() {
959991
Some(("Forwarding channel is not in a ready state.", 0x1000 | 7, self.get_channel_update(chan).unwrap()))
960992
} else {
@@ -979,6 +1011,7 @@ impl ChannelManager {
9791011
}
9801012

9811013
/// only fails if the channel does not yet have an assigned short_id
1014+
/// May be called with channel_state already locked!
9821015
fn get_channel_update(&self, chan: &Channel) -> Result<msgs::ChannelUpdate, HandleError> {
9831016
let short_channel_id = match chan.get_short_channel_id() {
9841017
None => return Err(HandleError{err: "Channel not yet established", action: None}),
@@ -1049,29 +1082,35 @@ impl ChannelManager {
10491082
let onion_packet = ChannelManager::construct_onion_packet(onion_payloads, onion_keys, &payment_hash);
10501083

10511084
let (first_hop_node_id, update_add, commitment_signed) = {
1052-
let mut channel_state_lock = self.channel_state.lock().unwrap();
1053-
let channel_state = channel_state_lock.borrow_parts();
1085+
let mut channel_state = self.channel_state.lock().unwrap();
10541086

10551087
let id = match channel_state.short_to_id.get(&route.hops.first().unwrap().short_channel_id) {
10561088
None => return Err(APIError::ChannelUnavailable{err: "No channel available with first hop!"}),
10571089
Some(id) => id.clone(),
10581090
};
10591091

10601092
let res = {
1061-
let chan = channel_state.by_id.get_mut(&id).unwrap();
1062-
if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
1063-
return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
1064-
}
1065-
if !chan.is_live() {
1066-
return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
1067-
}
1068-
match chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
1069-
route: route.clone(),
1070-
session_priv: session_priv.clone(),
1071-
}, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})? {
1093+
let res = {
1094+
let chan = channel_state.by_id.get_mut(&id).unwrap();
1095+
if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
1096+
return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
1097+
}
1098+
if chan.is_awaiting_monitor_update() {
1099+
return Err(APIError::MonitorUpdateFailed);
1100+
}
1101+
if !chan.is_live() {
1102+
return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
1103+
}
1104+
chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
1105+
route: route.clone(),
1106+
session_priv: session_priv.clone(),
1107+
}, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})?
1108+
};
1109+
match res {
10721110
Some((update_add, commitment_signed, chan_monitor)) => {
1073-
if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1074-
unimplemented!();
1111+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1112+
self.handle_monitor_update_fail(channel_state, &id, e, RAACommitmentOrder::CommitmentFirst);
1113+
return Err(APIError::MonitorUpdateFailed);
10751114
}
10761115
Some((update_add, commitment_signed))
10771116
},
@@ -1479,7 +1518,81 @@ impl ChannelManager {
14791518
/// ChannelMonitorUpdateErr::TemporaryFailure was returned from a channel monitor update
14801519
/// operation.
14811520
pub fn test_restore_channel_monitor(&self) {
1482-
unimplemented!();
1521+
let mut new_events = Vec::new();
1522+
let mut close_results = Vec::new();
1523+
let mut htlc_forwards = Vec::new();
1524+
let mut htlc_failures = Vec::new();
1525+
1526+
let mut channel_lock = self.channel_state.lock().unwrap();
1527+
let channel_state = channel_lock.borrow_parts();
1528+
let short_to_id = channel_state.short_to_id;
1529+
channel_state.by_id.retain(|_, channel| {
1530+
if channel.is_awaiting_monitor_update() {
1531+
let chan_monitor = channel.channel_monitor();
1532+
if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
1533+
match e {
1534+
ChannelMonitorUpdateErr::PermanentFailure => {
1535+
if let Some(short_id) = channel.get_short_channel_id() {
1536+
short_to_id.remove(&short_id);
1537+
}
1538+
close_results.push(channel.force_shutdown());
1539+
if let Ok(update) = self.get_channel_update(&channel) {
1540+
new_events.push(events::Event::BroadcastChannelUpdate {
1541+
msg: update
1542+
});
1543+
}
1544+
false
1545+
},
1546+
ChannelMonitorUpdateErr::TemporaryFailure => true,
1547+
}
1548+
} else {
1549+
let (raa, commitment_update, order, pending_forwards, mut pending_failures) = channel.monitor_updating_restored();
1550+
if !pending_forwards.is_empty() {
1551+
htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards));
1552+
}
1553+
htlc_failures.append(&mut pending_failures);
1554+
1555+
macro_rules! handle_cs { () => {
1556+
if let Some(update) = commitment_update {
1557+
new_events.push(events::Event::UpdateHTLCs {
1558+
node_id: channel.get_their_node_id(),
1559+
updates: update,
1560+
});
1561+
}
1562+
} }
1563+
macro_rules! handle_raa { () => {
1564+
if let Some(revoke_and_ack) = raa {
1565+
new_events.push(events::Event::SendRevokeAndACK {
1566+
node_id: channel.get_their_node_id(),
1567+
msg: revoke_and_ack,
1568+
});
1569+
}
1570+
} }
1571+
match order {
1572+
RAACommitmentOrder::CommitmentFirst => {
1573+
handle_cs!();
1574+
handle_raa!();
1575+
},
1576+
RAACommitmentOrder::RevokeAndACKFirst => {
1577+
handle_raa!();
1578+
handle_cs!();
1579+
},
1580+
}
1581+
true
1582+
}
1583+
} else { true }
1584+
});
1585+
1586+
for failure in htlc_failures.drain(..) {
1587+
self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
1588+
}
1589+
self.forward_htlcs(&mut htlc_forwards[..]);
1590+
1591+
for res in close_results.drain(..) {
1592+
self.finish_force_close_channel(res);
1593+
}
1594+
1595+
self.pending_events.lock().unwrap().append(&mut new_events);
14831596
}
14841597

14851598
fn internal_open_channel(&self, their_node_id: &PublicKey, msg: &msgs::OpenChannel) -> Result<msgs::AcceptChannel, MsgHandleErrInternal> {
@@ -2010,6 +2123,9 @@ impl ChannelManager {
20102123
if !chan.is_outbound() {
20112124
return Err(APIError::APIMisuseError{err: "update_fee cannot be sent for an inbound channel"});
20122125
}
2126+
if chan.is_awaiting_monitor_update() {
2127+
return Err(APIError::MonitorUpdateFailed);
2128+
}
20132129
if !chan.is_live() {
20142130
return Err(APIError::ChannelUnavailable{err: "Channel is either not yet fully established or peer is currently disconnected"});
20152131
}

src/ln/channelmonitor.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,22 @@ pub enum ChannelMonitorUpdateErr {
4747
/// submitting new commitment transactions to the remote party.
4848
/// ChannelManager::test_restore_channel_monitor can be used to retry the update(s) and restore
4949
/// the channel to an operational state.
50+
///
51+
/// Note that continuing to operate when no copy of the updated ChannelMonitor could be
52+
/// persisted is unsafe - if you failed to store the update on your own local disk you should
53+
/// instead return PermanentFailure to force closure of the channel ASAP.
54+
///
55+
/// Even when a channel has been "frozen" updates to the ChannelMonitor can continue to occur
56+
/// (eg if an inbound HTLC which we forwarded was claimed upstream resulting in us attempting
57+
/// to claim it on this channel) and those updates must be applied wherever they can be. At
58+
/// least one such updated ChannelMonitor must be persisted otherwise PermanentFailure should
59+
/// be returned to get things on-chain ASAP using only the in-memory copy. Obviously updates to
60+
/// the channel which would invalidate previous ChannelMonitors are not made when a channel has
61+
/// been "frozen".
62+
///
63+
/// Note that even if updates made after TemporaryFailure succeed you must still call
64+
/// test_restore_channel_monitor to ensure you have the latest monitor and re-enable normal
65+
/// channel operation.
5066
TemporaryFailure,
5167
/// Used to indicate no further channel monitor updates will be allowed (eg we've moved on to a
5268
/// different watchtower and cannot update with all watchtowers that were previously informed

src/util/errors.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ pub enum APIError {
3232
ChannelUnavailable {
3333
/// A human-readable error message
3434
err: &'static str
35-
}
35+
},
36+
/// An attempt to call add_update_monitor returned an Err (ie you did this!), causing the
37+
/// attempted action to fail.
38+
MonitorUpdateFailed,
3639
}
3740

3841
impl fmt::Debug for APIError {
@@ -42,6 +45,7 @@ impl fmt::Debug for APIError {
4245
APIError::FeeRateTooHigh {ref err, ref feerate} => write!(f, "{} feerate: {}", err, feerate),
4346
APIError::RouteError {ref err} => f.write_str(err),
4447
APIError::ChannelUnavailable {ref err} => f.write_str(err),
48+
APIError::MonitorUpdateFailed => f.write_str("Client indicated a channel monitor update failed"),
4549
}
4650
}
4751
}

0 commit comments

Comments
 (0)