Skip to content

Commit aeeafed

Browse files
authored
Merge pull request #1376 from jurvis/jurvis/persist-networkgraph
Persist NetworkGraph on removal of stale channels
2 parents 7671ae5 + df2e60d commit aeeafed

File tree

2 files changed

+136
-45
lines changed
  • lightning-background-processor/src
  • lightning-persister/src

2 files changed

+136
-45
lines changed

lightning-background-processor/src/lib.rs

Lines changed: 122 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,13 @@ const PING_TIMER: u64 = 1;
7575
/// Prune the network graph of stale entries hourly.
7676
const NETWORK_PRUNE_TIMER: u64 = 60 * 60;
7777

78-
/// Trait which handles persisting a [`ChannelManager`] to disk.
79-
///
80-
/// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
81-
pub trait ChannelManagerPersister<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
78+
#[cfg(not(test))]
79+
const FIRST_NETWORK_PRUNE_TIMER: u64 = 60;
80+
#[cfg(test)]
81+
const FIRST_NETWORK_PRUNE_TIMER: u64 = 1;
82+
83+
/// Trait that handles persisting a [`ChannelManager`] and [`NetworkGraph`] to disk.
84+
pub trait Persister<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
8285
where
8386
M::Target: 'static + chain::Watch<Signer>,
8487
T::Target: 'static + BroadcasterInterface,
@@ -87,24 +90,11 @@ where
8790
L::Target: 'static + Logger,
8891
{
8992
/// Persist the given [`ChannelManager`] to disk, returning an error if persistence failed
90-
/// (which will cause the [`BackgroundProcessor`] which called this method to exit.
91-
///
92-
/// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
93+
/// (which will cause the [`BackgroundProcessor`] which called this method to exit).
9394
fn persist_manager(&self, channel_manager: &ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error>;
94-
}
9595

96-
impl<Fun, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
97-
ChannelManagerPersister<Signer, M, T, K, F, L> for Fun where
98-
M::Target: 'static + chain::Watch<Signer>,
99-
T::Target: 'static + BroadcasterInterface,
100-
K::Target: 'static + KeysInterface<Signer = Signer>,
101-
F::Target: 'static + FeeEstimator,
102-
L::Target: 'static + Logger,
103-
Fun: Fn(&ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error>,
104-
{
105-
fn persist_manager(&self, channel_manager: &ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error> {
106-
self(channel_manager)
107-
}
96+
/// Persist the given [`NetworkGraph`] to disk, returning an error if persistence failed.
97+
fn persist_graph(&self, network_graph: &NetworkGraph) -> Result<(), std::io::Error>;
10898
}
10999

110100
/// Decorates an [`EventHandler`] with common functionality provided by standard [`EventHandler`]s.
@@ -141,17 +131,21 @@ impl BackgroundProcessor {
141131
/// documentation].
142132
///
143133
/// The thread runs indefinitely unless the object is dropped, [`stop`] is called, or
144-
/// `persist_manager` returns an error. In case of an error, the error is retrieved by calling
134+
/// [`Persister::persist_manager`] returns an error. In case of an error, the error is retrieved by calling
145135
/// either [`join`] or [`stop`].
146136
///
147137
/// # Data Persistence
148138
///
149-
/// `persist_manager` is responsible for writing out the [`ChannelManager`] to disk, and/or
139+
/// [`Persister::persist_manager`] is responsible for writing out the [`ChannelManager`] to disk, and/or
150140
/// uploading to one or more backup services. See [`ChannelManager::write`] for writing out a
151141
/// [`ChannelManager`]. See [`FilesystemPersister::persist_manager`] for Rust-Lightning's
152142
/// provided implementation.
153143
///
154-
/// Typically, users should either implement [`ChannelManagerPersister`] to never return an
144+
/// [`Persister::persist_graph`] is responsible for writing out the [`NetworkGraph`] to disk. See
145+
/// [`NetworkGraph::write`] for writing out a [`NetworkGraph`]. See [`FilesystemPersister::persist_network_graph`]
146+
/// for Rust-Lightning's provided implementation.
147+
///
148+
/// Typically, users should either implement [`Persister::persist_manager`] to never return an
155149
/// error or call [`join`] and handle any error that may arise. For the latter case,
156150
/// `BackgroundProcessor` must be restarted by calling `start` again after handling the error.
157151
///
@@ -168,7 +162,9 @@ impl BackgroundProcessor {
168162
/// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
169163
/// [`ChannelManager::write`]: lightning::ln::channelmanager::ChannelManager#impl-Writeable
170164
/// [`FilesystemPersister::persist_manager`]: lightning_persister::FilesystemPersister::persist_manager
165+
/// [`FilesystemPersister::persist_network_graph`]: lightning_persister::FilesystemPersister::persist_network_graph
171166
/// [`NetworkGraph`]: lightning::routing::network_graph::NetworkGraph
167+
/// [`NetworkGraph::write`]: lightning::routing::network_graph::NetworkGraph#impl-Writeable
172168
pub fn start<
173169
Signer: 'static + Sign,
174170
CA: 'static + Deref + Send + Sync,
@@ -184,14 +180,14 @@ impl BackgroundProcessor {
184180
CMH: 'static + Deref + Send + Sync,
185181
RMH: 'static + Deref + Send + Sync,
186182
EH: 'static + EventHandler + Send,
187-
CMP: 'static + Send + ChannelManagerPersister<Signer, CW, T, K, F, L>,
183+
PS: 'static + Send + Persister<Signer, CW, T, K, F, L>,
188184
M: 'static + Deref<Target = ChainMonitor<Signer, CF, T, F, L, P>> + Send + Sync,
189185
CM: 'static + Deref<Target = ChannelManager<Signer, CW, T, K, F, L>> + Send + Sync,
190186
NG: 'static + Deref<Target = NetGraphMsgHandler<G, CA, L>> + Send + Sync,
191187
UMH: 'static + Deref + Send + Sync,
192188
PM: 'static + Deref<Target = PeerManager<Descriptor, CMH, RMH, L, UMH>> + Send + Sync,
193189
>(
194-
persister: CMP, event_handler: EH, chain_monitor: M, channel_manager: CM,
190+
persister: PS, event_handler: EH, chain_monitor: M, channel_manager: CM,
195191
net_graph_msg_handler: Option<NG>, peer_manager: PM, logger: L
196192
) -> Self
197193
where
@@ -273,19 +269,29 @@ impl BackgroundProcessor {
273269
// falling back to our usual hourly prunes. This avoids short-lived clients never
274270
// pruning their network graph. We run once 60 seconds after startup before
275271
// continuing our normal cadence.
276-
if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { 60 } {
272+
if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { FIRST_NETWORK_PRUNE_TIMER } {
277273
if let Some(ref handler) = net_graph_msg_handler {
278274
log_trace!(logger, "Pruning network graph of stale entries");
279275
handler.network_graph().remove_stale_channels();
276+
if let Err(e) = persister.persist_graph(handler.network_graph()) {
277+
log_error!(logger, "Error: Failed to persist network graph, check your disk and permissions {}", e)
278+
}
280279
last_prune_call = Instant::now();
281280
have_pruned = true;
282281
}
283282
}
284283
}
284+
285285
// After we exit, ensure we persist the ChannelManager one final time - this avoids
286286
// some races where users quit while channel updates were in-flight, with
287287
// ChannelMonitor update(s) persisted without a corresponding ChannelManager update.
288-
persister.persist_manager(&*channel_manager)
288+
persister.persist_manager(&*channel_manager)?;
289+
290+
// Persist NetworkGraph on exit
291+
if let Some(ref handler) = net_graph_msg_handler {
292+
persister.persist_graph(handler.network_graph())?;
293+
}
294+
Ok(())
289295
});
290296
Self { stop_thread: stop_thread_clone, thread_handle: Some(handle) }
291297
}
@@ -343,9 +349,10 @@ mod tests {
343349
use bitcoin::blockdata::constants::genesis_block;
344350
use bitcoin::blockdata::transaction::{Transaction, TxOut};
345351
use bitcoin::network::constants::Network;
346-
use lightning::chain::{BestBlock, Confirm, chainmonitor};
352+
use lightning::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
353+
use lightning::chain::{BestBlock, Confirm, chainmonitor, self};
347354
use lightning::chain::channelmonitor::ANTI_REORG_DELAY;
348-
use lightning::chain::keysinterface::{InMemorySigner, Recipient, KeysInterface, KeysManager};
355+
use lightning::chain::keysinterface::{InMemorySigner, Recipient, KeysInterface, KeysManager, Sign};
349356
use lightning::chain::transaction::OutPoint;
350357
use lightning::get_event_msg;
351358
use lightning::ln::channelmanager::{BREAKDOWN_TIMEOUT, ChainParameters, ChannelManager, SimpleArcChannelManager};
@@ -355,12 +362,14 @@ mod tests {
355362
use lightning::routing::network_graph::{NetworkGraph, NetGraphMsgHandler};
356363
use lightning::util::config::UserConfig;
357364
use lightning::util::events::{Event, MessageSendEventsProvider, MessageSendEvent};
365+
use lightning::util::logger::Logger;
358366
use lightning::util::ser::Writeable;
359367
use lightning::util::test_utils;
360368
use lightning_invoice::payment::{InvoicePayer, RetryAttempts};
361369
use lightning_invoice::utils::DefaultRouter;
362370
use lightning_persister::FilesystemPersister;
363371
use std::fs;
372+
use std::ops::Deref;
364373
use std::path::PathBuf;
365374
use std::sync::{Arc, Mutex};
366375
use std::time::Duration;
@@ -402,6 +411,48 @@ mod tests {
402411
}
403412
}
404413

414+
struct Persister {
415+
data_dir: String,
416+
graph_error: Option<(std::io::ErrorKind, &'static str)>,
417+
manager_error: Option<(std::io::ErrorKind, &'static str)>
418+
}
419+
420+
impl Persister {
421+
fn new(data_dir: String) -> Self {
422+
Self { data_dir, graph_error: None, manager_error: None }
423+
}
424+
425+
fn with_graph_error(self, error: std::io::ErrorKind, message: &'static str) -> Self {
426+
Self { graph_error: Some((error, message)), ..self }
427+
}
428+
429+
fn with_manager_error(self, error: std::io::ErrorKind, message: &'static str) -> Self {
430+
Self { manager_error: Some((error, message)), ..self }
431+
}
432+
}
433+
434+
impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L:Deref> super::Persister<Signer, M, T, K, F, L> for Persister where
435+
M::Target: 'static + chain::Watch<Signer>,
436+
T::Target: 'static + BroadcasterInterface,
437+
K::Target: 'static + KeysInterface<Signer = Signer>,
438+
F::Target: 'static + FeeEstimator,
439+
L::Target: 'static + Logger,
440+
{
441+
fn persist_manager(&self, channel_manager: &ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error> {
442+
match self.manager_error {
443+
None => FilesystemPersister::persist_manager(self.data_dir.clone(), channel_manager),
444+
Some((error, message)) => Err(std::io::Error::new(error, message)),
445+
}
446+
}
447+
448+
fn persist_graph(&self, network_graph: &NetworkGraph) -> Result<(), std::io::Error> {
449+
match self.graph_error {
450+
None => FilesystemPersister::persist_network_graph(self.data_dir.clone(), network_graph),
451+
Some((error, message)) => Err(std::io::Error::new(error, message)),
452+
}
453+
}
454+
}
455+
405456
fn get_full_filepath(filepath: String, filename: String) -> String {
406457
let mut path = PathBuf::from(filepath);
407458
path.push(filename);
@@ -525,19 +576,20 @@ mod tests {
525576

526577
// Initiate the background processors to watch each node.
527578
let data_dir = nodes[0].persister.get_data_dir();
528-
let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
579+
let persister = Persister::new(data_dir);
529580
let event_handler = |_: &_| {};
530581
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
531582

532583
macro_rules! check_persisted_data {
533-
($node: expr, $filepath: expr, $expected_bytes: expr) => {
584+
($node: expr, $filepath: expr) => {
585+
let mut expected_bytes = Vec::new();
534586
loop {
535-
$expected_bytes.clear();
536-
match $node.write(&mut $expected_bytes) {
587+
expected_bytes.clear();
588+
match $node.write(&mut expected_bytes) {
537589
Ok(()) => {
538590
match std::fs::read($filepath) {
539591
Ok(bytes) => {
540-
if bytes == $expected_bytes {
592+
if bytes == expected_bytes {
541593
break
542594
} else {
543595
continue
@@ -554,8 +606,8 @@ mod tests {
554606

555607
// Check that the initial channel manager data is persisted as expected.
556608
let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "manager".to_string());
557-
let mut expected_bytes = Vec::new();
558-
check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes);
609+
check_persisted_data!(nodes[0].node, filepath.clone());
610+
559611
loop {
560612
if !nodes[0].node.get_persistence_condvar_value() { break }
561613
}
@@ -564,12 +616,18 @@ mod tests {
564616
nodes[0].node.force_close_channel(&OutPoint { txid: tx.txid(), index: 0 }.to_channel_id()).unwrap();
565617

566618
// Check that the force-close updates are persisted.
567-
let mut expected_bytes = Vec::new();
568-
check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes);
619+
check_persisted_data!(nodes[0].node, filepath.clone());
569620
loop {
570621
if !nodes[0].node.get_persistence_condvar_value() { break }
571622
}
572623

624+
// Check network graph is persisted
625+
let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "network_graph".to_string());
626+
if let Some(ref handler) = nodes[0].net_graph_msg_handler {
627+
let network_graph = handler.network_graph();
628+
check_persisted_data!(network_graph, filepath.clone());
629+
}
630+
573631
assert!(bg_processor.stop().is_ok());
574632
}
575633

@@ -579,7 +637,7 @@ mod tests {
579637
// `FRESHNESS_TIMER`.
580638
let nodes = create_nodes(1, "test_timer_tick_called".to_string());
581639
let data_dir = nodes[0].persister.get_data_dir();
582-
let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
640+
let persister = Persister::new(data_dir);
583641
let event_handler = |_: &_| {};
584642
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
585643
loop {
@@ -596,12 +654,13 @@ mod tests {
596654
}
597655

598656
#[test]
599-
fn test_persist_error() {
657+
fn test_channel_manager_persist_error() {
600658
// Test that if we encounter an error during manager persistence, the thread panics.
601659
let nodes = create_nodes(2, "test_persist_error".to_string());
602660
open_channel!(nodes[0], nodes[1], 100000);
603661

604-
let persister = |_: &_| Err(std::io::Error::new(std::io::ErrorKind::Other, "test"));
662+
let data_dir = nodes[0].persister.get_data_dir();
663+
let persister = Persister::new(data_dir).with_manager_error(std::io::ErrorKind::Other, "test");
605664
let event_handler = |_: &_| {};
606665
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
607666
match bg_processor.join() {
@@ -613,19 +672,37 @@ mod tests {
613672
}
614673
}
615674

675+
#[test]
676+
fn test_network_graph_persist_error() {
677+
// Test that if we encounter an error during network graph persistence, an error gets returned.
678+
let nodes = create_nodes(2, "test_persist_network_graph_error".to_string());
679+
let data_dir = nodes[0].persister.get_data_dir();
680+
let persister = Persister::new(data_dir).with_graph_error(std::io::ErrorKind::Other, "test");
681+
let event_handler = |_: &_| {};
682+
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
683+
684+
match bg_processor.stop() {
685+
Ok(_) => panic!("Expected error persisting network graph"),
686+
Err(e) => {
687+
assert_eq!(e.kind(), std::io::ErrorKind::Other);
688+
assert_eq!(e.get_ref().unwrap().to_string(), "test");
689+
},
690+
}
691+
}
692+
616693
#[test]
617694
fn test_background_event_handling() {
618695
let mut nodes = create_nodes(2, "test_background_event_handling".to_string());
619696
let channel_value = 100000;
620697
let data_dir = nodes[0].persister.get_data_dir();
621-
let persister = move |node: &_| FilesystemPersister::persist_manager(data_dir.clone(), node);
698+
let persister = Persister::new(data_dir.clone());
622699

623700
// Set up a background event handler for FundingGenerationReady events.
624701
let (sender, receiver) = std::sync::mpsc::sync_channel(1);
625702
let event_handler = move |event: &Event| {
626703
sender.send(handle_funding_generation_ready!(event, channel_value)).unwrap();
627704
};
628-
let bg_processor = BackgroundProcessor::start(persister.clone(), event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
705+
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
629706

630707
// Open a channel and check that the FundingGenerationReady event was handled.
631708
begin_open_channel!(nodes[0], nodes[1], channel_value);
@@ -649,7 +726,7 @@ mod tests {
649726
// Set up a background event handler for SpendableOutputs events.
650727
let (sender, receiver) = std::sync::mpsc::sync_channel(1);
651728
let event_handler = move |event: &Event| sender.send(event.clone()).unwrap();
652-
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
729+
let bg_processor = BackgroundProcessor::start(Persister::new(data_dir), event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
653730

654731
// Force close the channel and check that the SpendableOutputs event was handled.
655732
nodes[0].node.force_close_channel(&nodes[0].node.list_channels()[0].channel_id).unwrap();
@@ -675,7 +752,7 @@ mod tests {
675752

676753
// Initiate the background processors to watch each node.
677754
let data_dir = nodes[0].persister.get_data_dir();
678-
let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
755+
let persister = Persister::new(data_dir);
679756
let scorer = Arc::new(Mutex::new(test_utils::TestScorer::with_penalty(0)));
680757
let router = DefaultRouter::new(Arc::clone(&nodes[0].network_graph), Arc::clone(&nodes[0].logger), random_seed_bytes);
681758
let invoice_payer = Arc::new(InvoicePayer::new(Arc::clone(&nodes[0].node), router, scorer, Arc::clone(&nodes[0].logger), |_: &_| {}, RetryAttempts(2)));

lightning-persister/src/lib.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ extern crate libc;
1616

1717
use bitcoin::hash_types::{BlockHash, Txid};
1818
use bitcoin::hashes::hex::{FromHex, ToHex};
19+
use lightning::routing::network_graph::NetworkGraph;
1920
use crate::util::DiskWriteable;
2021
use lightning::chain;
2122
use lightning::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
@@ -66,6 +67,12 @@ where
6667
}
6768
}
6869

70+
impl DiskWriteable for NetworkGraph {
71+
fn write_to_file(&self, writer: &mut fs::File) -> Result<(), std::io::Error> {
72+
self.write(writer)
73+
}
74+
}
75+
6976
impl FilesystemPersister {
7077
/// Initialize a new FilesystemPersister and set the path to the individual channels'
7178
/// files.
@@ -103,6 +110,13 @@ impl FilesystemPersister {
103110
util::write_to_file(path, "manager".to_string(), manager)
104111
}
105112

113+
/// Write the provided `NetworkGraph` to the path provided at `FilesystemPersister`
114+
/// initialization, within a file called "network_graph"
115+
pub fn persist_network_graph(data_dir: String, network_graph: &NetworkGraph) -> Result<(), std::io::Error> {
116+
let path = PathBuf::from(data_dir);
117+
util::write_to_file(path, "network_graph".to_string(), network_graph)
118+
}
119+
106120
/// Read `ChannelMonitor`s from disk.
107121
pub fn read_channelmonitors<Signer: Sign, K: Deref> (
108122
&self, keys_manager: K

0 commit comments

Comments
 (0)