Skip to content

Commit a1bb7ca

Browse files
committed
Disallow taking two instances of the same mutex at the same time
Taking two instances of the same mutex may be totally fine, but it requires a total lockorder that we cannot (trivially) check. Thus, its generally unsafe to do if we can avoid it. To discourage doing this, here we default to panicing on such locks in our lockorder tests, with a separate lock function added that is clearly labeled "unsafe" to allow doing so when we can guarantee a total lockorder. This requires adapting a number of sites to the new API, including fixing a bug this turned up in `ChannelMonitor`'s `PartialEq` where no lockorder was guaranteed.
1 parent 19bc412 commit a1bb7ca

9 files changed

+102
-48
lines changed

lightning/src/chain/channelmonitor.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use core::{cmp, mem};
6060
use crate::io::{self, Error};
6161
use core::convert::TryInto;
6262
use core::ops::Deref;
63-
use crate::sync::Mutex;
63+
use crate::sync::{Mutex, LockTestExt};
6464

6565
/// An update generated by the underlying channel itself which contains some new information the
6666
/// [`ChannelMonitor`] should be made aware of.
@@ -855,9 +855,13 @@ pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>);
855855

856856
impl<Signer: WriteableEcdsaChannelSigner> PartialEq for ChannelMonitor<Signer> where Signer: PartialEq {
857857
fn eq(&self, other: &Self) -> bool {
858-
let inner = self.inner.lock().unwrap();
859-
let other = other.inner.lock().unwrap();
860-
inner.eq(&other)
858+
// We need some kind of total lockorder. Absent a better idea, we sort by position in
859+
// memory and take locks in that order (assuming that we can't move within memory while a
860+
// lock is held).
861+
let ord = ((self as *const _) as usize) < ((other as *const _) as usize);
862+
let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() };
863+
let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() };
864+
a.eq(&b)
861865
}
862866
}
863867

lightning/src/ln/chanmon_update_fail_tests.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,13 @@ fn test_monitor_and_persister_update_fail() {
108108
blocks: Arc::new(Mutex::new(vec![(genesis_block(Network::Testnet), 200); 200])),
109109
};
110110
let chain_mon = {
111-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
112-
let mut w = test_utils::TestVecWriter(Vec::new());
113-
monitor.write(&mut w).unwrap();
114-
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
115-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
116-
assert!(new_monitor == *monitor);
111+
let new_monitor = {
112+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
113+
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
114+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
115+
assert!(new_monitor == *monitor);
116+
new_monitor
117+
};
117118
let chain_mon = test_utils::TestChainMonitor::new(Some(&chain_source), &tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
118119
assert_eq!(chain_mon.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
119120
chain_mon

lightning/src/ln/channelmanager.rs

+10-3
Original file line numberDiff line numberDiff line change
@@ -6930,7 +6930,10 @@ where
69306930
let mut monitor_update_blocked_actions_per_peer = None;
69316931
let mut peer_states = Vec::new();
69326932
for (_, peer_state_mutex) in per_peer_state.iter() {
6933-
peer_states.push(peer_state_mutex.lock().unwrap());
6933+
// Because we're holding the owning `per_peer_state` write lock here there's no chance
6934+
// of a lockorder violation deadlock - no other thread can be holding any
6935+
// per_peer_state lock at all.
6936+
peer_states.push(peer_state_mutex.unsafe_well_ordered_double_lock_self());
69346937
}
69356938

69366939
(serializable_peer_count).write(writer)?;
@@ -8275,18 +8278,20 @@ mod tests {
82758278
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82768279
assert_eq!(nodes_0_lock.len(), 1);
82778280
assert!(nodes_0_lock.contains_key(channel_id));
8278-
8279-
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
82808281
}
82818282

8283+
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
8284+
82828285
let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
82838286

82848287
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
82858288
{
82868289
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82878290
assert_eq!(nodes_0_lock.len(), 1);
82888291
assert!(nodes_0_lock.contains_key(channel_id));
8292+
}
82898293

8294+
{
82908295
// Assert that `nodes[1]`'s `id_to_peer` map is populated with the channel as soon as
82918296
// as it has the funding transaction.
82928297
let nodes_1_lock = nodes[1].node.id_to_peer.lock().unwrap();
@@ -8316,7 +8321,9 @@ mod tests {
83168321
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
83178322
assert_eq!(nodes_0_lock.len(), 1);
83188323
assert!(nodes_0_lock.contains_key(channel_id));
8324+
}
83198325

8326+
{
83208327
// At this stage, `nodes[1]` has proposed a fee for the closing transaction in the
83218328
// `handle_closing_signed` call above. As `nodes[1]` has not yet received the signature
83228329
// from `nodes[0]` for the closing transaction with the proposed fee, the channel is

lightning/src/ln/functional_test_utils.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,8 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
466466
panic!();
467467
}
468468
}
469-
assert_eq!(*chain_source.watched_txn.lock().unwrap(), *self.chain_source.watched_txn.lock().unwrap());
470-
assert_eq!(*chain_source.watched_outputs.lock().unwrap(), *self.chain_source.watched_outputs.lock().unwrap());
469+
assert_eq!(*chain_source.watched_txn.unsafe_well_ordered_double_lock_self(), *self.chain_source.watched_txn.unsafe_well_ordered_double_lock_self());
470+
assert_eq!(*chain_source.watched_outputs.unsafe_well_ordered_double_lock_selflock(), *self.chain_source.watched_outputs.unsafe_well_ordered_double_lock_selflock());
471471
}
472472
}
473473
}

lightning/src/ln/functional_tests.rs

+21-18
Original file line numberDiff line numberDiff line change
@@ -8160,12 +8160,13 @@ fn test_update_err_monitor_lockdown() {
81608160
let logger = test_utils::TestLogger::with_id(format!("node {}", 0));
81618161
let persister = test_utils::TestPersister::new();
81628162
let watchtower = {
8163-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8164-
let mut w = test_utils::TestVecWriter(Vec::new());
8165-
monitor.write(&mut w).unwrap();
8166-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8167-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8168-
assert!(new_monitor == *monitor);
8163+
let new_monitor = {
8164+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8165+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8166+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8167+
assert!(new_monitor == *monitor);
8168+
new_monitor
8169+
};
81698170
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
81708171
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
81718172
watchtower
@@ -8227,12 +8228,13 @@ fn test_concurrent_monitor_claim() {
82278228
let logger = test_utils::TestLogger::with_id(format!("node {}", "Alice"));
82288229
let persister = test_utils::TestPersister::new();
82298230
let watchtower_alice = {
8230-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8231-
let mut w = test_utils::TestVecWriter(Vec::new());
8232-
monitor.write(&mut w).unwrap();
8233-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8234-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8235-
assert!(new_monitor == *monitor);
8231+
let new_monitor = {
8232+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8233+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8234+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8235+
assert!(new_monitor == *monitor);
8236+
new_monitor
8237+
};
82368238
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82378239
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82388240
watchtower
@@ -8256,12 +8258,13 @@ fn test_concurrent_monitor_claim() {
82568258
let logger = test_utils::TestLogger::with_id(format!("node {}", "Bob"));
82578259
let persister = test_utils::TestPersister::new();
82588260
let watchtower_bob = {
8259-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8260-
let mut w = test_utils::TestVecWriter(Vec::new());
8261-
monitor.write(&mut w).unwrap();
8262-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8263-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8264-
assert!(new_monitor == *monitor);
8261+
let new_monitor = {
8262+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8263+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8264+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8265+
assert!(new_monitor == *monitor);
8266+
new_monitor
8267+
};
82658268
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82668269
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82678270
watchtower

lightning/src/routing/utxo.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::util::ser::Writeable;
2626
use crate::prelude::*;
2727

2828
use alloc::sync::{Arc, Weak};
29-
use crate::sync::Mutex;
29+
use crate::sync::{Mutex, LockTestExt};
3030
use core::ops::Deref;
3131

3232
/// An error when accessing the chain via [`UtxoLookup`].
@@ -404,7 +404,10 @@ impl PendingChecks {
404404
// lookup if we haven't gotten that far yet).
405405
match Weak::upgrade(&e.get()) {
406406
Some(pending_msgs) => {
407-
let pending_matches = match &pending_msgs.lock().unwrap().channel_announce {
407+
// This may be called with the mutex held on a different UtxoMessages
408+
// struct, however in that case we have a global lockorder of new messages
409+
// -> old messages, which makes this safe.
410+
let pending_matches = match &pending_msgs.unsafe_well_ordered_double_lock_self().channel_announce {
408411
Some(ChannelAnnouncement::Full(pending_msg)) => Some(pending_msg) == full_msg,
409412
Some(ChannelAnnouncement::Unsigned(pending_msg)) => pending_msg == msg,
410413
None => {

lightning/src/sync/debug_sync.rs

+25-9
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,21 @@ impl LockMetadata {
124124
res
125125
}
126126

127-
fn pre_lock(this: &Arc<LockMetadata>) {
127+
fn pre_lock(this: &Arc<LockMetadata>, double_lock_self_allowed: bool) {
128128
LOCKS_HELD.with(|held| {
129129
// For each lock which is currently locked, check that no lock's locked-before
130130
// set includes the lock we're about to lock, which would imply a lockorder
131131
// inversion.
132132
for (locked_idx, locked) in held.borrow().iter() {
133133
if *locked_idx == this.lock_idx {
134-
// With `feature = "backtrace"` set, we may be looking at different instances
135-
// of the same lock.
136-
debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
134+
// Note that with `feature = "backtrace"` set, we may be looking at different
135+
// instances of the same lock. Still, doing so is quite risky, a total order
136+
// must be maintained, and doing so across a set of otherwise-identical mutexes
137+
// is fraught with issues.
138+
debug_assert!(cfg!(feature = "backtrace") && double_lock_self_allowed, "Tried to acquire a lock while it was held!");
137139
}
140+
}
141+
for (locked_idx, locked) in held.borrow().iter() {
138142
for (locked_dep_idx, _locked_dep) in locked.locked_before.lock().unwrap().iter() {
139143
if *locked_dep_idx == this.lock_idx && *locked_dep_idx != locked.lock_idx {
140144
#[cfg(feature = "backtrace")]
@@ -236,7 +240,7 @@ impl<T> Mutex<T> {
236240
}
237241

238242
pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
239-
LockMetadata::pre_lock(&self.deps);
243+
LockMetadata::pre_lock(&self.deps, false);
240244
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ())
241245
}
242246

@@ -249,11 +253,17 @@ impl<T> Mutex<T> {
249253
}
250254
}
251255

252-
impl <T> LockTestExt for Mutex<T> {
256+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
253257
#[inline]
254258
fn held_by_thread(&self) -> LockHeldState {
255259
LockMetadata::held_by_thread(&self.deps)
256260
}
261+
type ExclLock = MutexGuard<'a, T>;
262+
#[inline]
263+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> {
264+
LockMetadata::pre_lock(&self.deps, true);
265+
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).unwrap()
266+
}
257267
}
258268

259269
pub struct RwLock<T: Sized> {
@@ -318,12 +328,12 @@ impl<T> RwLock<T> {
318328
// Note that while we could be taking a recursive read lock here, Rust's `RwLock` may
319329
// deadlock trying to take a second read lock if another thread is waiting on the write
320330
// lock. Its platform dependent (but our in-tree `FairRwLock` guarantees this behavior).
321-
LockMetadata::pre_lock(&self.deps);
331+
LockMetadata::pre_lock(&self.deps, false);
322332
self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard }).map_err(|_| ())
323333
}
324334

325335
pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
326-
LockMetadata::pre_lock(&self.deps);
336+
LockMetadata::pre_lock(&self.deps, false);
327337
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).map_err(|_| ())
328338
}
329339

@@ -336,11 +346,17 @@ impl<T> RwLock<T> {
336346
}
337347
}
338348

339-
impl <T> LockTestExt for RwLock<T> {
349+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
340350
#[inline]
341351
fn held_by_thread(&self) -> LockHeldState {
342352
LockMetadata::held_by_thread(&self.deps)
343353
}
354+
type ExclLock = RwLockWriteGuard<'a, T>;
355+
#[inline]
356+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
357+
LockMetadata::pre_lock(&self.deps, true);
358+
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).unwrap()
359+
}
344360
}
345361

346362
pub type FairRwLock<T> = RwLock<T>;

lightning/src/sync/fairrwlock.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,15 @@ impl<T> FairRwLock<T> {
5050
}
5151
}
5252

53-
impl<T> LockTestExt for FairRwLock<T> {
53+
impl<'a, T: 'a> LockTestExt<'a> for FairRwLock<T> {
5454
#[inline]
5555
fn held_by_thread(&self) -> LockHeldState {
5656
// fairrwlock is only built in non-test modes, so we should never support tests.
5757
LockHeldState::Unsupported
5858
}
59+
type ExclLock = RwLockWriteGuard<'a, T>;
60+
#[inline]
61+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
62+
self.write().unwrap()
63+
}
5964
}

lightning/src/sync/mod.rs

+18-3
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@ pub(crate) enum LockHeldState {
77
Unsupported,
88
}
99

10-
pub(crate) trait LockTestExt {
10+
pub(crate) trait LockTestExt<'a> {
1111
fn held_by_thread(&self) -> LockHeldState;
12+
type ExclLock;
13+
/// If two instances of the same mutex are being taken at the same time, it's very easy to have
14+
/// a lockorder inversion and risk deadlock. Thus, we default to disabling such locks.
15+
///
16+
/// However, sometimes they cannot be avoided. In such cases, this method exists to take a
17+
/// mutex while avoiding a test failure. It is deliberately verbose and includes the term
18+
/// "unsafe" to indicate that special care needs to be taken to ensure no deadlocks are
19+
/// possible.
20+
fn unsafe_well_ordered_double_lock_self(&'a self) -> Self::ExclLock;
1221
}
1322

1423
#[cfg(all(feature = "std", not(feature = "_bench_unstable"), test))]
@@ -27,13 +36,19 @@ pub use {std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard, R
2736
#[cfg(all(feature = "std", any(feature = "_bench_unstable", not(test))))]
2837
mod ext_impl {
2938
use super::*;
30-
impl<T> LockTestExt for Mutex<T> {
39+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
3140
#[inline]
3241
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
42+
type ExclLock = MutexGuard<'a, T>;
43+
#[inline]
44+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> { self.lock().unwrap() }
3345
}
34-
impl<T> LockTestExt for RwLock<T> {
46+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
3547
#[inline]
3648
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
49+
type ExclLock = RwLockWriteGuard<'a, T>;
50+
#[inline]
51+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<T> { self.write().unwrap() }
3752
}
3853
}
3954

0 commit comments

Comments
 (0)