Refuse recursive read locks in lockorder testing

TheBlueMatt · TheBlueMatt · commit c84a27433837 · 2023-02-24T20:30:32.000Z
Our existing lockorder tests assume that a read lock on a thread
that is already holding the same read lock is totally fine. This
isn't at all true. The `std` `RwLock` behavior is
platform-dependent - on most platforms readers can starve writers
as readers will never block for a pending writer. However, on
platforms where this is not the case, one thread trying to take a
write lock may deadlock with another thread that both already has,
and is attempting to take again, a read lock.

Worse, our in-tree `FairRwLock` exhibits this behavior explicitly
on all platforms to avoid the starvation issue.

Thus, we shouldn't have any special handling for allowing recursive
read locks, so we simply remove it here.
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -1426,9 +1426,11 @@ fn monitor_failed_no_reestablish_response() {
 	{
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
+		get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
+	}
+	{
 		let mut node_1_per_peer_lock;
 		let mut node_1_peer_state_lock;
-		get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
 		get_channel_ref!(nodes[1], nodes[0], node_1_per_peer_lock, node_1_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
 	}
 
diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs
@@ -2150,9 +2150,10 @@ pub fn route_over_limit<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_rou
 		assert!(err.contains("Cannot send value that would put us over the max HTLC value in flight our peer will accept")));
 }
 
-pub fn send_payment<'a, 'b, 'c>(origin: &Node<'a, 'b, 'c>, expected_route: &[&Node<'a, 'b, 'c>], recv_value: u64)  {
-	let our_payment_preimage = route_payment(&origin, expected_route, recv_value).0;
-	claim_payment(&origin, expected_route, our_payment_preimage);
+pub fn send_payment<'a, 'b, 'c>(origin: &Node<'a, 'b, 'c>, expected_route: &[&Node<'a, 'b, 'c>], recv_value: u64) -> (PaymentPreimage, PaymentHash, PaymentSecret) {
+	let res = route_payment(&origin, expected_route, recv_value);
+	claim_payment(&origin, expected_route, res.0);
+	res
 }
 
 pub fn fail_payment_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_paths: &[&[&Node<'a, 'b, 'c>]], skip_last: bool, our_payment_hash: PaymentHash) {
diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs
@@ -1194,33 +1194,31 @@ fn test_trivial_inflight_htlc_tracking(){
 	let (_, _, chan_2_id, _) = create_announced_chan_between_nodes(&nodes, 1, 2);
 
 	// Send and claim the payment. Inflight HTLCs should be empty.
-	let (route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[2], 500000);
-	nodes[0].node.send_payment(&route, payment_hash, &Some(payment_secret), PaymentId(payment_hash.0)).unwrap();
-	check_added_monitors!(nodes[0], 1);
-	pass_along_route(&nodes[0], &[&vec!(&nodes[1], &nodes[2])[..]], 500000, payment_hash, payment_secret);
-	claim_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], payment_preimage);
+	let payment_hash = send_payment(&nodes[0], &[&nodes[1], &nodes[2]], 500000).1;
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		assert_eq!(chan_1_used_liquidity, None);
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
 
-		assert_eq!(chan_1_used_liquidity, None);
 		assert_eq!(chan_2_used_liquidity, None);
 	}
 	let pending_payments = nodes[0].node.list_recent_payments();
@@ -1233,30 +1231,32 @@ fn test_trivial_inflight_htlc_tracking(){
 	}
 
 	// Send the payment, but do not claim it. Our inflight HTLCs should contain the pending payment.
-	let (payment_preimage, payment_hash,  _) = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 500000);
+	let (payment_preimage, payment_hash,  _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 500000);
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		// First hop accounts for expected 1000 msat fee
+		assert_eq!(chan_1_used_liquidity, Some(501000));
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
 
-		// First hop accounts for expected 1000 msat fee
-		assert_eq!(chan_1_used_liquidity, Some(501000));
 		assert_eq!(chan_2_used_liquidity, Some(500000));
 	}
 	let pending_payments = nodes[0].node.list_recent_payments();
@@ -1271,28 +1271,29 @@ fn test_trivial_inflight_htlc_tracking(){
 		nodes[0].node.timer_tick_occurred();
 	}
 
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		assert_eq!(chan_1_used_liquidity, None);
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
-
-		assert_eq!(chan_1_used_liquidity, None);
 		assert_eq!(chan_2_used_liquidity, None);
 	}
 
diff --git a/lightning/src/routing/utxo.rs b/lightning/src/routing/utxo.rs
@@ -747,10 +747,11 @@ mod tests {
 			Ok(TxOut { value: 1_000_000, script_pubkey: good_script }));
 
 		assert_eq!(chan_update_a.contents.timestamp, chan_update_b.contents.timestamp);
-		assert!(network_graph.read_only().channels()
+		let graph_lock = network_graph.read_only();
+		assert!(graph_lock.channels()
 				.get(&valid_announcement.contents.short_channel_id).as_ref().unwrap()
 				.one_to_two.as_ref().unwrap().last_update !=
-			network_graph.read_only().channels()
+			graph_lock.channels()
 				.get(&valid_announcement.contents.short_channel_id).as_ref().unwrap()
 				.two_to_one.as_ref().unwrap().last_update);
 	}
diff --git a/lightning/src/sync/debug_sync.rs b/lightning/src/sync/debug_sync.rs
@@ -124,21 +124,13 @@ impl LockMetadata {
 		res
 	}
 
-	// Returns whether we were a recursive lock (only relevant for read)
-	fn _pre_lock(this: &Arc<LockMetadata>, read: bool) -> bool {
-		let mut inserted = false;
+	fn pre_lock(this: &Arc<LockMetadata>) {
 		LOCKS_HELD.with(|held| {
 			// For each lock which is currently locked, check that no lock's locked-before
 			// set includes the lock we're about to lock, which would imply a lockorder
 			// inversion.
-			for (locked_idx, _locked) in held.borrow().iter() {
-				if read && *locked_idx == this.lock_idx {
-					// Recursive read locks are explicitly allowed
-					return;
-				}
-			}
 			for (locked_idx, locked) in held.borrow().iter() {
-				if !read && *locked_idx == this.lock_idx {
+				if *locked_idx == this.lock_idx {
 					// With `feature = "backtrace"` set, we may be looking at different instances
 					// of the same lock.
 					debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
@@ -162,14 +154,9 @@ impl LockMetadata {
 				}
 			}
 			held.borrow_mut().insert(this.lock_idx, Arc::clone(this));
-			inserted = true;
 		});
-		inserted
 	}
 
-	fn pre_lock(this: &Arc<LockMetadata>) { Self::_pre_lock(this, false); }
-	fn pre_read_lock(this: &Arc<LockMetadata>) -> bool { Self::_pre_lock(this, true) }
-
 	fn held_by_thread(this: &Arc<LockMetadata>) -> LockHeldState {
 		let mut res = LockHeldState::NotHeldByThread;
 		LOCKS_HELD.with(|held| {
@@ -276,7 +263,6 @@ pub struct RwLock<T: Sized> {
 
 pub struct RwLockReadGuard<'a, T: Sized + 'a> {
 	lock: &'a RwLock<T>,
-	first_lock: bool,
 	guard: StdRwLockReadGuard<'a, T>,
 }
 
@@ -295,12 +281,6 @@ impl<T: Sized> Deref for RwLockReadGuard<'_, T> {
 
 impl<T: Sized> Drop for RwLockReadGuard<'_, T> {
 	fn drop(&mut self) {
-		if !self.first_lock {
-			// Note that its not strictly true that the first taken read lock will get unlocked
-			// last, but in practice our locks are always taken as RAII, so it should basically
-			// always be true.
-			return;
-		}
 		LOCKS_HELD.with(|held| {
 			held.borrow_mut().remove(&self.lock.deps.lock_idx);
 		});
@@ -335,8 +315,11 @@ impl<T> RwLock<T> {
 	}
 
 	pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
-		let first_lock = LockMetadata::pre_read_lock(&self.deps);
-		self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard, first_lock }).map_err(|_| ())
+		// Note that while we could be taking a recursive read lock here, Rust's `RwLock` may
+		// deadlock trying to take a second read lock if another thread is waiting on the write
+		// lock. Its platform dependent (but our in-tree `FairRwLock` guarantees this behavior).
+		LockMetadata::pre_lock(&self.deps);
+		self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard }).map_err(|_| ())
 	}
 
 	pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
diff --git a/lightning/src/sync/test_lockorder_checks.rs b/lightning/src/sync/test_lockorder_checks.rs
@@ -15,6 +15,8 @@ fn recursive_lock_fail() {
 }
 
 #[test]
+#[should_panic]
+#[cfg(not(feature = "backtrace"))]
 fn recursive_read() {
 	let lock = RwLock::new(());
 	let _a = lock.read().unwrap();
@@ -66,23 +68,6 @@ fn read_lockorder_fail() {
 	}
 }
 
-#[test]
-fn read_recursive_no_lockorder() {
-	// Like the above, but note that no lockorder is implied when we recursively read-lock a
-	// RwLock, causing this to pass just fine.
-	let a = RwLock::new(());
-	let b = RwLock::new(());
-	let _outer = a.read().unwrap();
-	{
-		let _a = a.read().unwrap();
-		let _b = b.read().unwrap();
-	}
-	{
-		let _b = b.read().unwrap();
-		let _a = a.read().unwrap();
-	}
-}
-
 #[test]
 #[should_panic]
 fn read_write_lockorder_fail() {

Original file line number	Diff line number	Diff line change
`@@ -1426,9 +1426,11 @@ fn monitor_failed_no_reestablish_response() {`
`1426`	`1426`	`{`
`1427`	`1427`	`let mut node_0_per_peer_lock;`
`1428`	`1428`	`let mut node_0_peer_state_lock;`
	`1429`	`+ get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
	`1430`	`+ }`
	`1431`	`+ {`
`1429`	`1432`	`let mut node_1_per_peer_lock;`
`1430`	`1433`	`let mut node_1_peer_state_lock;`
`1431`		`- get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
`1432`	`1434`	`get_channel_ref!(nodes[1], nodes[0], node_1_per_peer_lock, node_1_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
`1433`	`1435`	`}`
`1434`	`1436`
Original file line number	Diff line number	Diff line change
`@@ -2150,9 +2150,10 @@ pub fn route_over_limit<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_rou`
`2150`	`2150`	`assert!(err.contains("Cannot send value that would put us over the max HTLC value in flight our peer will accept")));`
`2151`	`2151`	`}`
`2152`	`2152`
`2153`		`-pub fn send_payment<'a, 'b, 'c>(origin: &Node<'a, 'b, 'c>, expected_route: &[&Node<'a, 'b, 'c>], recv_value: u64) {`
`2154`		`- let our_payment_preimage = route_payment(&origin, expected_route, recv_value).0;`
`2155`		`- claim_payment(&origin, expected_route, our_payment_preimage);`
	`2153`	`+pub fn send_payment<'a, 'b, 'c>(origin: &Node<'a, 'b, 'c>, expected_route: &[&Node<'a, 'b, 'c>], recv_value: u64) -> (PaymentPreimage, PaymentHash, PaymentSecret) {`
	`2154`	`+ let res = route_payment(&origin, expected_route, recv_value);`
	`2155`	`+ claim_payment(&origin, expected_route, res.0);`
	`2156`	`+ res`
`2156`	`2157`	`}`
`2157`	`2158`
`2158`	`2159`	`pub fn fail_payment_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_paths: &[&[&Node<'a, 'b, 'c>]], skip_last: bool, our_payment_hash: PaymentHash) {`