Address most comments.

arik-so · arik-so · commit 73c9ed15e544 · 2022-04-29T10:50:02.000-07:00
diff --git a/lightning-background-processor/src/lib.rs b/lightning-background-processor/src/lib.rs
@@ -166,6 +166,12 @@ impl BackgroundProcessor {
 	/// [`FilesystemPersister::persist_network_graph`]: lightning_persister::FilesystemPersister::persist_network_graph
 	/// [`NetworkGraph`]: lightning::routing::network_graph::NetworkGraph
 	/// [`NetworkGraph::write`]: lightning::routing::network_graph::NetworkGraph#impl-Writeable
+	///
+	/// # Graph Sync
+	///
+	/// If a rapid graph sync is meant to run at startup, set `await_graph_sync_completion` to true
+	/// to indicate to [`BackgroundProcessor`] not to prune the [`NetworkGraph`] instance until
+	/// [`graph_sync_complete`] is called.
 	pub fn start<
 		Signer: 'static + Sign,
 		CA: 'static + Deref + Send + Sync,
@@ -267,23 +273,26 @@ impl BackgroundProcessor {
 					last_ping_call = Instant::now();
 				}
 
-				// The network graph must not be pruned while graph sync completion is pending
-				let is_currently_awaiting_graph_sync = is_awaiting_graph_sync_completion_clone.load(Ordering::Acquire);
-				if !is_currently_awaiting_graph_sync {
-					// Note that we want to run a graph prune once not long after startup before
-					// falling back to our usual hourly prunes. This avoids short-lived clients never
-					// pruning their network graph. We run once 60 seconds after startup before
-					// continuing our normal cadence.
-					if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { FIRST_NETWORK_PRUNE_TIMER } {
-						if let Some(ref handler) = net_graph_msg_handler {
-							log_trace!(logger, "Pruning network graph of stale entries");
-							handler.network_graph().remove_stale_channels();
-							if let Err(e) = persister.persist_graph(handler.network_graph()) {
-								log_error!(logger, "Error: Failed to persist network graph, check your disk and permissions {}", e)
-							}
-							last_prune_call = Instant::now();
-							have_pruned = true;
+				// Note that we want to run a graph prune once not long after startup before
+				// falling back to our usual hourly prunes. This avoids short-lived clients never
+				// pruning their network graph. We run once 60 seconds after startup before
+				// continuing our normal cadence.
+				if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { FIRST_NETWORK_PRUNE_TIMER } {
+					if let Some(ref handler) = net_graph_msg_handler {
+						log_trace!(logger, "Assessing prunability of network graph");
+						// The network graph must not be pruned while graph sync completion is pending
+						let is_currently_awaiting_graph_sync = is_awaiting_graph_sync_completion_clone.load(Ordering::Acquire);
+						if is_currently_awaiting_graph_sync {
+							log_trace!(logger, "Not pruning network graph due to pending graph sync");
+							continue;
+						}
+						log_trace!(logger, "Pruning network graph of stale entries");
+						handler.network_graph().remove_stale_channels();
+						if let Err(e) = persister.persist_graph(handler.network_graph()) {
+							log_error!(logger, "Error: Failed to persist network graph, check your disk and permissions {}", e)
 						}
+						last_prune_call = Instant::now();
+						have_pruned = true;
 					}
 				}
 			}
@@ -330,7 +339,8 @@ impl BackgroundProcessor {
 		self.stop_and_join_thread()
 	}
 
-	/// Signal to `BackgroundProcessor` that graph sync has completed
+	/// Signal to [`BackgroundProcessor`] that the initial rapid graph sync has completed.
+	///
 	/// This function can only be called usefully once, so there is an implicit understanding
 	/// that running graph sync multiple times after startup is API misuse.
 	pub fn graph_sync_complete(&self) {
@@ -448,7 +458,6 @@ mod tests {
 		fn with_manager_error(self, error: std::io::ErrorKind, message: &'static str) -> Self {
 			Self { manager_error: Some((error, message)), ..self }
 		}
-
 	}
 
 	impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L:Deref> super::Persister<Signer, M, T, K, F, L> for Persister where
@@ -467,7 +476,7 @@ mod tests {
 
 		fn persist_graph(&self, network_graph: &NetworkGraph) -> Result<(), std::io::Error> {
 			if let Some(sender) = &self.graph_sender {
-				sender.send(network_graph.clone());
+				sender.send(network_graph.clone()).unwrap();
 			};
 
 			match self.graph_error {
@@ -770,29 +779,35 @@ mod tests {
 
 	#[test]
 	fn test_not_pruning_network_graph_until_graph_sync_completion() {
-		let mut nodes = create_nodes(2, "test_not_pruning_network_graph_until_graph_sync_completion".to_string());
-		let channel_value = 100000;
+		let nodes = create_nodes(2, "test_not_pruning_network_graph_until_graph_sync_completion".to_string());
 		let data_dir = nodes[0].persister.get_data_dir();
 		let (sender, receiver) = std::sync::mpsc::sync_channel(1);
 		let persister = Persister::new(data_dir.clone()).with_graph_sender(sender);
 		let network_graph = nodes[0].network_graph.clone();
 		let features = ChannelFeatures::empty();
-		network_graph.update_channel_from_partial_announcement(42, 53, features, nodes[0].node.get_our_node_id(), nodes[1].node.get_our_node_id());
+		network_graph.update_channel_from_partial_announcement(42, 53, features, nodes[0].node.get_our_node_id(), nodes[1].node.get_our_node_id())
+			.expect("Failed to update channel from partial announcement");
 		let original_graph_description = network_graph.to_string();
 		assert!(original_graph_description.contains("42: features: 0000, node_one:"));
 
 		let event_handler = |_: &_| {};
 		let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), true, nodes[0].logger.clone());
 
-		let reception_result = receiver
-			.recv_timeout(Duration::from_secs(EVENT_DEADLINE));
-		assert!(reception_result.is_err());
+		loop {
+			let log_entries = nodes[0].logger.lines.lock().unwrap();
+			let expected_log_a = "Assessing prunability of network graph".to_string();
+			let expected_log_b = "Not pruning network graph due to pending graph sync".to_string();
+			if log_entries.get(&("lightning_background_processor".to_string(), expected_log_a)).is_some() &&
+				log_entries.get(&("lightning_background_processor".to_string(), expected_log_b)).is_some() {
+				break
+			}
+		}
 
 		bg_processor.graph_sync_complete();
 
-		let graph = receiver
-			.recv_timeout(Duration::from_secs(EVENT_DEADLINE))
-			.expect("SpendableOutputs not handled within deadline");
+		let _ = receiver
+			.recv_timeout(Duration::from_secs(super::FIRST_NETWORK_PRUNE_TIMER * 2))
+			.expect("Network graph not pruned within deadline");
 		let current_graph_description = network_graph.to_string();
 		assert_ne!(current_graph_description, original_graph_description);
 		assert_eq!(current_graph_description.len(), 31);