Skip to content

Commit 9880ae2

Browse files
committed
Disallow taking two instances of the same mutex at the same time
Taking two instances of the same mutex may be totally fine, but it requires a total lockorder that we cannot (trivially) check. Thus, its generally unsafe to do if we can avoid it. To discourage doing this, here we default to panicing on such locks in our lockorder tests, with a separate lock function added that is clearly labeled "unsafe" to allow doing so when we can guarantee a total lockorder. This requires adapting a number of sites to the new API, including fixing a bug this turned up in `ChannelMonitor`'s `PartialEq` where no lockorder was guaranteed.
1 parent 4894f1a commit 9880ae2

10 files changed

+113
-52
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use core::{cmp, mem};
6060
use crate::io::{self, Error};
6161
use core::convert::TryInto;
6262
use core::ops::Deref;
63-
use crate::sync::Mutex;
63+
use crate::sync::{Mutex, LockTestExt};
6464

6565
/// An update generated by the underlying channel itself which contains some new information the
6666
/// [`ChannelMonitor`] should be made aware of.
@@ -855,9 +855,13 @@ pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>);
855855

856856
impl<Signer: WriteableEcdsaChannelSigner> PartialEq for ChannelMonitor<Signer> where Signer: PartialEq {
857857
fn eq(&self, other: &Self) -> bool {
858-
let inner = self.inner.lock().unwrap();
859-
let other = other.inner.lock().unwrap();
860-
inner.eq(&other)
858+
// We need some kind of total lockorder. Absent a better idea, we sort by position in
859+
// memory and take locks in that order (assuming that we can't move within memory while a
860+
// lock is held).
861+
let ord = ((self as *const _) as usize) < ((other as *const _) as usize);
862+
let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() };
863+
let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() };
864+
a.eq(&b)
861865
}
862866
}
863867

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,13 @@ fn test_monitor_and_persister_update_fail() {
108108
blocks: Arc::new(Mutex::new(vec![(genesis_block(Network::Testnet), 200); 200])),
109109
};
110110
let chain_mon = {
111-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
112-
let mut w = test_utils::TestVecWriter(Vec::new());
113-
monitor.write(&mut w).unwrap();
114-
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
115-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
116-
assert!(new_monitor == *monitor);
111+
let new_monitor = {
112+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
113+
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
114+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
115+
assert!(new_monitor == *monitor);
116+
new_monitor
117+
};
117118
let chain_mon = test_utils::TestChainMonitor::new(Some(&chain_source), &tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
118119
assert_eq!(chain_mon.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
119120
chain_mon

lightning/src/ln/channelmanager.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6941,7 +6941,10 @@ where
69416941
let mut monitor_update_blocked_actions_per_peer = None;
69426942
let mut peer_states = Vec::new();
69436943
for (_, peer_state_mutex) in per_peer_state.iter() {
6944-
peer_states.push(peer_state_mutex.lock().unwrap());
6944+
// Because we're holding the owning `per_peer_state` write lock here there's no chance
6945+
// of a lockorder violation deadlock - no other thread can be holding any
6946+
// per_peer_state lock at all.
6947+
peer_states.push(peer_state_mutex.unsafe_well_ordered_double_lock_self());
69456948
}
69466949

69476950
(serializable_peer_count).write(writer)?;
@@ -8286,18 +8289,20 @@ mod tests {
82868289
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82878290
assert_eq!(nodes_0_lock.len(), 1);
82888291
assert!(nodes_0_lock.contains_key(channel_id));
8289-
8290-
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
82918292
}
82928293

8294+
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
8295+
82938296
let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
82948297

82958298
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
82968299
{
82978300
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82988301
assert_eq!(nodes_0_lock.len(), 1);
82998302
assert!(nodes_0_lock.contains_key(channel_id));
8303+
}
83008304

8305+
{
83018306
// Assert that `nodes[1]`'s `id_to_peer` map is populated with the channel as soon as
83028307
// as it has the funding transaction.
83038308
let nodes_1_lock = nodes[1].node.id_to_peer.lock().unwrap();
@@ -8327,7 +8332,9 @@ mod tests {
83278332
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
83288333
assert_eq!(nodes_0_lock.len(), 1);
83298334
assert!(nodes_0_lock.contains_key(channel_id));
8335+
}
83308336

8337+
{
83318338
// At this stage, `nodes[1]` has proposed a fee for the closing transaction in the
83328339
// `handle_closing_signed` call above. As `nodes[1]` has not yet received the signature
83338340
// from `nodes[0]` for the closing transaction with the proposed fee, the channel is

lightning/src/ln/functional_test_utils.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::io;
4444
use crate::prelude::*;
4545
use core::cell::RefCell;
4646
use alloc::rc::Rc;
47-
use crate::sync::{Arc, Mutex};
47+
use crate::sync::{Arc, Mutex, LockTestExt};
4848
use core::mem;
4949
use core::iter::repeat;
5050
use bitcoin::{PackedLockTime, TxMerkleNode};
@@ -466,8 +466,8 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
466466
panic!();
467467
}
468468
}
469-
assert_eq!(*chain_source.watched_txn.lock().unwrap(), *self.chain_source.watched_txn.lock().unwrap());
470-
assert_eq!(*chain_source.watched_outputs.lock().unwrap(), *self.chain_source.watched_outputs.lock().unwrap());
469+
assert_eq!(*chain_source.watched_txn.unsafe_well_ordered_double_lock_self(), *self.chain_source.watched_txn.unsafe_well_ordered_double_lock_self());
470+
assert_eq!(*chain_source.watched_outputs.unsafe_well_ordered_double_lock_self(), *self.chain_source.watched_outputs.unsafe_well_ordered_double_lock_self());
471471
}
472472
}
473473
}

lightning/src/ln/functional_tests.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8157,12 +8157,13 @@ fn test_update_err_monitor_lockdown() {
81578157
let logger = test_utils::TestLogger::with_id(format!("node {}", 0));
81588158
let persister = test_utils::TestPersister::new();
81598159
let watchtower = {
8160-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8161-
let mut w = test_utils::TestVecWriter(Vec::new());
8162-
monitor.write(&mut w).unwrap();
8163-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8164-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8165-
assert!(new_monitor == *monitor);
8160+
let new_monitor = {
8161+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8162+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8163+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8164+
assert!(new_monitor == *monitor);
8165+
new_monitor
8166+
};
81668167
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
81678168
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
81688169
watchtower
@@ -8224,12 +8225,13 @@ fn test_concurrent_monitor_claim() {
82248225
let logger = test_utils::TestLogger::with_id(format!("node {}", "Alice"));
82258226
let persister = test_utils::TestPersister::new();
82268227
let watchtower_alice = {
8227-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8228-
let mut w = test_utils::TestVecWriter(Vec::new());
8229-
monitor.write(&mut w).unwrap();
8230-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8231-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8232-
assert!(new_monitor == *monitor);
8228+
let new_monitor = {
8229+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8230+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8231+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8232+
assert!(new_monitor == *monitor);
8233+
new_monitor
8234+
};
82338235
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82348236
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82358237
watchtower
@@ -8253,12 +8255,13 @@ fn test_concurrent_monitor_claim() {
82538255
let logger = test_utils::TestLogger::with_id(format!("node {}", "Bob"));
82548256
let persister = test_utils::TestPersister::new();
82558257
let watchtower_bob = {
8256-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8257-
let mut w = test_utils::TestVecWriter(Vec::new());
8258-
monitor.write(&mut w).unwrap();
8259-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8260-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8261-
assert!(new_monitor == *monitor);
8258+
let new_monitor = {
8259+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8260+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8261+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8262+
assert!(new_monitor == *monitor);
8263+
new_monitor
8264+
};
82628265
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82638266
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82648267
watchtower

lightning/src/routing/utxo.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::util::ser::Writeable;
2626
use crate::prelude::*;
2727

2828
use alloc::sync::{Arc, Weak};
29-
use crate::sync::Mutex;
29+
use crate::sync::{Mutex, LockTestExt};
3030
use core::ops::Deref;
3131

3232
/// An error when accessing the chain via [`UtxoLookup`].
@@ -404,7 +404,10 @@ impl PendingChecks {
404404
// lookup if we haven't gotten that far yet).
405405
match Weak::upgrade(&e.get()) {
406406
Some(pending_msgs) => {
407-
let pending_matches = match &pending_msgs.lock().unwrap().channel_announce {
407+
// This may be called with the mutex held on a different UtxoMessages
408+
// struct, however in that case we have a global lockorder of new messages
409+
// -> old messages, which makes this safe.
410+
let pending_matches = match &pending_msgs.unsafe_well_ordered_double_lock_self().channel_announce {
408411
Some(ChannelAnnouncement::Full(pending_msg)) => Some(pending_msg) == full_msg,
409412
Some(ChannelAnnouncement::Unsigned(pending_msg)) => pending_msg == msg,
410413
None => {

lightning/src/sync/debug_sync.rs

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,21 @@ impl LockMetadata {
124124
res
125125
}
126126

127-
fn pre_lock(this: &Arc<LockMetadata>) {
127+
fn pre_lock(this: &Arc<LockMetadata>, double_lock_self_allowed: bool) {
128128
LOCKS_HELD.with(|held| {
129129
// For each lock which is currently locked, check that no lock's locked-before
130130
// set includes the lock we're about to lock, which would imply a lockorder
131131
// inversion.
132132
for (locked_idx, locked) in held.borrow().iter() {
133133
if *locked_idx == this.lock_idx {
134-
// With `feature = "backtrace"` set, we may be looking at different instances
135-
// of the same lock.
136-
debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
134+
// Note that with `feature = "backtrace"` set, we may be looking at different
135+
// instances of the same lock. Still, doing so is quite risky, a total order
136+
// must be maintained, and doing so across a set of otherwise-identical mutexes
137+
// is fraught with issues.
138+
debug_assert!(cfg!(feature = "backtrace") && double_lock_self_allowed, "Tried to acquire a lock while it was held!");
137139
}
140+
}
141+
for (locked_idx, locked) in held.borrow().iter() {
138142
for (locked_dep_idx, _locked_dep) in locked.locked_before.lock().unwrap().iter() {
139143
if *locked_dep_idx == this.lock_idx && *locked_dep_idx != locked.lock_idx {
140144
#[cfg(feature = "backtrace")]
@@ -236,7 +240,7 @@ impl<T> Mutex<T> {
236240
}
237241

238242
pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
239-
LockMetadata::pre_lock(&self.deps);
243+
LockMetadata::pre_lock(&self.deps, false);
240244
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ())
241245
}
242246

@@ -249,11 +253,17 @@ impl<T> Mutex<T> {
249253
}
250254
}
251255

252-
impl <T> LockTestExt for Mutex<T> {
256+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
253257
#[inline]
254258
fn held_by_thread(&self) -> LockHeldState {
255259
LockMetadata::held_by_thread(&self.deps)
256260
}
261+
type ExclLock = MutexGuard<'a, T>;
262+
#[inline]
263+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> {
264+
LockMetadata::pre_lock(&self.deps, true);
265+
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).unwrap()
266+
}
257267
}
258268

259269
pub struct RwLock<T: Sized> {
@@ -317,13 +327,14 @@ impl<T> RwLock<T> {
317327
pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
318328
// Note that while we could be taking a recursive read lock here, Rust's `RwLock` may
319329
// deadlock trying to take a second read lock if another thread is waiting on the write
320-
// lock. Its platform dependent (but our in-tree `FairRwLock` guarantees this behavior).
321-
LockMetadata::pre_lock(&self.deps);
330+
// lock. This behavior is platform dependent, but our in-tree `FairRwLock` guarantees
331+
// such a deadlock.
332+
LockMetadata::pre_lock(&self.deps, false);
322333
self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard }).map_err(|_| ())
323334
}
324335

325336
pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
326-
LockMetadata::pre_lock(&self.deps);
337+
LockMetadata::pre_lock(&self.deps, false);
327338
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).map_err(|_| ())
328339
}
329340

@@ -336,11 +347,17 @@ impl<T> RwLock<T> {
336347
}
337348
}
338349

339-
impl <T> LockTestExt for RwLock<T> {
350+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
340351
#[inline]
341352
fn held_by_thread(&self) -> LockHeldState {
342353
LockMetadata::held_by_thread(&self.deps)
343354
}
355+
type ExclLock = RwLockWriteGuard<'a, T>;
356+
#[inline]
357+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
358+
LockMetadata::pre_lock(&self.deps, true);
359+
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).unwrap()
360+
}
344361
}
345362

346363
pub type FairRwLock<T> = RwLock<T>;

lightning/src/sync/fairrwlock.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,15 @@ impl<T> FairRwLock<T> {
5050
}
5151
}
5252

53-
impl<T> LockTestExt for FairRwLock<T> {
53+
impl<'a, T: 'a> LockTestExt<'a> for FairRwLock<T> {
5454
#[inline]
5555
fn held_by_thread(&self) -> LockHeldState {
5656
// fairrwlock is only built in non-test modes, so we should never support tests.
5757
LockHeldState::Unsupported
5858
}
59+
type ExclLock = RwLockWriteGuard<'a, T>;
60+
#[inline]
61+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
62+
self.write().unwrap()
63+
}
5964
}

lightning/src/sync/mod.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@ pub(crate) enum LockHeldState {
77
Unsupported,
88
}
99

10-
pub(crate) trait LockTestExt {
10+
pub(crate) trait LockTestExt<'a> {
1111
fn held_by_thread(&self) -> LockHeldState;
12+
type ExclLock;
13+
/// If two instances of the same mutex are being taken at the same time, it's very easy to have
14+
/// a lockorder inversion and risk deadlock. Thus, we default to disabling such locks.
15+
///
16+
/// However, sometimes they cannot be avoided. In such cases, this method exists to take a
17+
/// mutex while avoiding a test failure. It is deliberately verbose and includes the term
18+
/// "unsafe" to indicate that special care needs to be taken to ensure no deadlocks are
19+
/// possible.
20+
fn unsafe_well_ordered_double_lock_self(&'a self) -> Self::ExclLock;
1221
}
1322

1423
#[cfg(all(feature = "std", not(feature = "_bench_unstable"), test))]
@@ -27,13 +36,19 @@ pub use {std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard, R
2736
#[cfg(all(feature = "std", any(feature = "_bench_unstable", not(test))))]
2837
mod ext_impl {
2938
use super::*;
30-
impl<T> LockTestExt for Mutex<T> {
39+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
3140
#[inline]
3241
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
42+
type ExclLock = MutexGuard<'a, T>;
43+
#[inline]
44+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> { self.lock().unwrap() }
3345
}
34-
impl<T> LockTestExt for RwLock<T> {
46+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
3547
#[inline]
3648
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
49+
type ExclLock = RwLockWriteGuard<'a, T>;
50+
#[inline]
51+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<T> { self.write().unwrap() }
3752
}
3853
}
3954

lightning/src/sync/nostd_sync.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ impl<T> Mutex<T> {
6262
}
6363
}
6464

65-
impl<T> LockTestExt for Mutex<T> {
65+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
6666
#[inline]
6767
fn held_by_thread(&self) -> LockHeldState {
6868
if self.lock().is_err() { return LockHeldState::HeldByThread; }
6969
else { return LockHeldState::NotHeldByThread; }
7070
}
71+
type ExclLock = MutexGuard<'a, T>;
72+
#[inline]
73+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> { self.lock().unwrap() }
7174
}
7275

7376
pub struct RwLock<T: ?Sized> {
@@ -125,12 +128,15 @@ impl<T> RwLock<T> {
125128
}
126129
}
127130

128-
impl<T> LockTestExt for RwLock<T> {
131+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
129132
#[inline]
130133
fn held_by_thread(&self) -> LockHeldState {
131134
if self.write().is_err() { return LockHeldState::HeldByThread; }
132135
else { return LockHeldState::NotHeldByThread; }
133136
}
137+
type ExclLock = RwLockWriteGuard<'a, T>;
138+
#[inline]
139+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<T> { self.write().unwrap() }
134140
}
135141

136142
pub type FairRwLock<T> = RwLock<T>;

0 commit comments

Comments
 (0)