Refuse recursive read locks in lockorder testing

TheBlueMatt · TheBlueMatt · commit 46428a7ad86d · 2023-02-04T00:51:32.000Z
Our existing lockorder tests assume that a read lock on a thread
that is already holding the same read lock is totally fine. This
isn't at all true. The `std` `RwLock` behavior is
platform-dependent - on most platforms readers can starve writers
as readers will never block for a pending writer. However, on
platforms where this is not the case, one thread trying to take a
write lock may deadlock with another thread that both already has,
and is attempting to take again, a read lock.

Worse, our in-tree `FairRwLock` exhibits this behavior explicitly
on all platforms to avoid the starvation issue.

Thus, we shouldn't have any special handling for allowing recursive
read locks, so we simply remove it here.
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -1426,9 +1426,11 @@ fn monitor_failed_no_reestablish_response() {
 	{
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
+		get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
+	}
+	{
 		let mut node_1_per_peer_lock;
 		let mut node_1_peer_state_lock;
-		get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
 		get_channel_ref!(nodes[1], nodes[0], node_1_per_peer_lock, node_1_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;
 	}
 
diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs
@@ -1275,83 +1275,88 @@ fn test_trivial_inflight_htlc_tracking(){
 
 	// Send and claim the payment. Inflight HTLCs should be empty.
 	send_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 500000);
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		assert_eq!(chan_1_used_liquidity, None);
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
 
-		assert_eq!(chan_1_used_liquidity, None);
 		assert_eq!(chan_2_used_liquidity, None);
 	}
 
 	// Send the payment, but do not claim it. Our inflight HTLCs should contain the pending payment.
 	let (payment_preimage, _,  _) = route_payment(&nodes[0], &vec!(&nodes[1], &nodes[2])[..], 500000);
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		// First hop accounts for expected 1000 msat fee
+		assert_eq!(chan_1_used_liquidity, Some(501000));
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
 
-		// First hop accounts for expected 1000 msat fee
-		assert_eq!(chan_1_used_liquidity, Some(501000));
 		assert_eq!(chan_2_used_liquidity, Some(500000));
 	}
 
 	// Now, let's claim the payment. This should result in the used liquidity to return `None`.
 	claim_payment(&nodes[0], &[&nodes[1], &nodes[2]], payment_preimage);
+	let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
 	{
-		let inflight_htlcs = node_chanmgrs[0].compute_inflight_htlcs();
-
 		let mut node_0_per_peer_lock;
 		let mut node_0_peer_state_lock;
-		let mut node_1_per_peer_lock;
-		let mut node_1_peer_state_lock;
 		let channel_1 =  get_channel_ref!(&nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1_id);
-		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
 
 		let chan_1_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[0].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()),
 			channel_1.get_short_channel_id().unwrap()
 		);
+		assert_eq!(chan_1_used_liquidity, None);
+	}
+	{
+		let mut node_1_per_peer_lock;
+		let mut node_1_peer_state_lock;
+		let channel_2 =  get_channel_ref!(&nodes[1], nodes[2], node_1_per_peer_lock, node_1_peer_state_lock, chan_2_id);
+
 		let chan_2_used_liquidity = inflight_htlcs.used_liquidity_msat(
 			&NodeId::from_pubkey(&nodes[1].node.get_our_node_id()) ,
 			&NodeId::from_pubkey(&nodes[2].node.get_our_node_id()),
 			channel_2.get_short_channel_id().unwrap()
 		);
-
-		assert_eq!(chan_1_used_liquidity, None);
 		assert_eq!(chan_2_used_liquidity, None);
 	}
 }
diff --git a/lightning/src/sync/debug_sync.rs b/lightning/src/sync/debug_sync.rs
@@ -122,21 +122,13 @@ impl LockMetadata {
 		res
 	}
 
-	// Returns whether we were a recursive lock (only relevant for read)
-	fn _pre_lock(this: &Arc<LockMetadata>, read: bool) -> bool {
-		let mut inserted = false;
+	fn pre_lock(this: &Arc<LockMetadata>) {
 		LOCKS_HELD.with(|held| {
 			// For each lock which is currently locked, check that no lock's locked-before
 			// set includes the lock we're about to lock, which would imply a lockorder
 			// inversion.
-			for (locked_idx, _locked) in held.borrow().iter() {
-				if read && *locked_idx == this.lock_idx {
-					// Recursive read locks are explicitly allowed
-					return;
-				}
-			}
 			for (locked_idx, locked) in held.borrow().iter() {
-				if !read && *locked_idx == this.lock_idx {
+				if *locked_idx == this.lock_idx {
 					// With `feature = "backtrace"` set, we may be looking at different instances
 					// of the same lock.
 					debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
@@ -160,14 +152,9 @@ impl LockMetadata {
 				}
 			}
 			held.borrow_mut().insert(this.lock_idx, Arc::clone(this));
-			inserted = true;
 		});
-		inserted
 	}
 
-	fn pre_lock(this: &Arc<LockMetadata>) { Self::_pre_lock(this, false); }
-	fn pre_read_lock(this: &Arc<LockMetadata>) -> bool { Self::_pre_lock(this, true) }
-
 	fn try_locked(this: &Arc<LockMetadata>) {
 		LOCKS_HELD.with(|held| {
 			// Since a try-lock will simply fail if the lock is held already, we do not
@@ -255,7 +242,6 @@ pub struct RwLock<T: Sized> {
 
 pub struct RwLockReadGuard<'a, T: Sized + 'a> {
 	lock: &'a RwLock<T>,
-	first_lock: bool,
 	guard: StdRwLockReadGuard<'a, T>,
 }
 
@@ -274,12 +260,6 @@ impl<T: Sized> Deref for RwLockReadGuard<'_, T> {
 
 impl<T: Sized> Drop for RwLockReadGuard<'_, T> {
 	fn drop(&mut self) {
-		if !self.first_lock {
-			// Note that its not strictly true that the first taken read lock will get unlocked
-			// last, but in practice our locks are always taken as RAII, so it should basically
-			// always be true.
-			return;
-		}
 		LOCKS_HELD.with(|held| {
 			held.borrow_mut().remove(&self.lock.deps.lock_idx);
 		});
@@ -314,8 +294,11 @@ impl<T> RwLock<T> {
 	}
 
 	pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
-		let first_lock = LockMetadata::pre_read_lock(&self.deps);
-		self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard, first_lock }).map_err(|_| ())
+		// Note that while we could be taking a recursive read lock here, Rust's `RwLock` may
+		// deadlock trying to take a second read lock if another thread is waiting on the write
+		// lock. Its platform dependent (but our in-tree `FairRwLock` guarantees this behavior).
+		LockMetadata::pre_lock(&self.deps);
+		self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard }).map_err(|_| ())
 	}
 
 	pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
diff --git a/lightning/src/sync/test_lockorder_checks.rs b/lightning/src/sync/test_lockorder_checks.rs
@@ -10,6 +10,8 @@ fn recursive_lock_fail() {
 }
 
 #[test]
+#[should_panic]
+#[cfg(not(feature = "backtrace"))]
 fn recursive_read() {
 	let lock = RwLock::new(());
 	let _a = lock.read().unwrap();
@@ -61,23 +63,6 @@ fn read_lockorder_fail() {
 	}
 }
 
-#[test]
-fn read_recursive_no_lockorder() {
-	// Like the above, but note that no lockorder is implied when we recursively read-lock a
-	// RwLock, causing this to pass just fine.
-	let a = RwLock::new(());
-	let b = RwLock::new(());
-	let _outer = a.read().unwrap();
-	{
-		let _a = a.read().unwrap();
-		let _b = b.read().unwrap();
-	}
-	{
-		let _b = b.read().unwrap();
-		let _a = a.read().unwrap();
-	}
-}
-
 #[test]
 #[should_panic]
 fn read_write_lockorder_fail() {

Original file line number	Diff line number	Diff line change
`@@ -1426,9 +1426,11 @@ fn monitor_failed_no_reestablish_response() {`
`1426`	`1426`	`{`
`1427`	`1427`	`let mut node_0_per_peer_lock;`
`1428`	`1428`	`let mut node_0_peer_state_lock;`
	`1429`	`+ get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
	`1430`	`+ }`
	`1431`	`+ {`
`1429`	`1432`	`let mut node_1_per_peer_lock;`
`1430`	`1433`	`let mut node_1_peer_state_lock;`
`1431`		`- get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
`1432`	`1434`	`get_channel_ref!(nodes[1], nodes[0], node_1_per_peer_lock, node_1_peer_state_lock, channel_id).announcement_sigs_state = AnnouncementSigsState::PeerReceived;`
`1433`	`1435`	`}`
`1434`	`1436`