Skip to content

Commit 3ceeb22

Browse files
committed
Expand lockorder testing to look at mutexes, not specific instances
Our existing lockorder inversion checks look at specific instances of mutexes rather than the general mutex itself. This changes that behavior to look at the instruction pointer at which a mutex was created and treat all mutexes which were created at the same location as equivalent. This allows us to detect lockorder inversions which occur across tests, though it does substantially reduce parallelism during test runs.
1 parent ab49a0e commit 3ceeb22

File tree

1 file changed

+187
-100
lines changed

1 file changed

+187
-100
lines changed

lightning/src/debug_sync.rs

Lines changed: 187 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use std::collections::HashSet;
66
use std::cell::RefCell;
77

88
use std::sync::atomic::{AtomicUsize, Ordering};
9-
109
use std::sync::Mutex as StdMutex;
1110
use std::sync::MutexGuard as StdMutexGuard;
1211
use std::sync::RwLock as StdRwLock;
@@ -15,7 +14,12 @@ use std::sync::RwLockWriteGuard as StdRwLockWriteGuard;
1514
use std::sync::Condvar as StdCondvar;
1615

1716
#[cfg(feature = "backtrace")]
18-
use backtrace::Backtrace;
17+
use {prelude::{HashMap, hash_map}, backtrace::Backtrace, std::sync::Once};
18+
19+
#[cfg(not(feature = "backtrace"))]
20+
struct Backtrace{}
21+
#[cfg(not(feature = "backtrace"))]
22+
impl Backtrace { fn new() -> Backtrace { Backtrace {} } }
1923

2024
pub type LockResult<Guard> = Result<Guard, ()>;
2125

@@ -48,13 +52,17 @@ thread_local! {
4852
}
4953
static LOCK_IDX: AtomicUsize = AtomicUsize::new(0);
5054

55+
#[cfg(feature = "backtrace")]
56+
static mut LOCKS: Option<StdMutex<HashMap<String, Arc<LockMetadata>>>> = None;
57+
#[cfg(feature = "backtrace")]
58+
static LOCKS_INIT: Once = Once::new();
59+
5160
/// Metadata about a single lock, by id, the set of things locked-before it, and the backtrace of
5261
/// when the Mutex itself was constructed.
5362
struct LockMetadata {
5463
lock_idx: u64,
55-
locked_before: StdMutex<HashSet<Arc<LockMetadata>>>,
56-
#[cfg(feature = "backtrace")]
57-
lock_construction_bt: Backtrace,
64+
locked_before: StdMutex<HashSet<LockDep>>,
65+
_lock_construction_bt: Backtrace,
5866
}
5967
impl PartialEq for LockMetadata {
6068
fn eq(&self, o: &LockMetadata) -> bool { self.lock_idx == o.lock_idx }
@@ -64,14 +72,73 @@ impl std::hash::Hash for LockMetadata {
6472
fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) { hasher.write_u64(self.lock_idx); }
6573
}
6674

75+
struct LockDep {
76+
lock: Arc<LockMetadata>,
77+
lockdep_trace: Option<Backtrace>,
78+
}
79+
impl LockDep {
80+
/// Note that `Backtrace::new()` is rather expensive so we rely on the caller to fill in the
81+
/// `lockdep_backtrace` field after ensuring we need it.
82+
fn new_without_bt(lock: &Arc<LockMetadata>) -> Self {
83+
Self { lock: Arc::clone(lock), lockdep_trace: None }
84+
}
85+
}
86+
impl PartialEq for LockDep {
87+
fn eq(&self, o: &LockDep) -> bool { self.lock.lock_idx == o.lock.lock_idx }
88+
}
89+
impl Eq for LockDep {}
90+
impl std::hash::Hash for LockDep {
91+
fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) { hasher.write_u64(self.lock.lock_idx); }
92+
}
93+
94+
#[cfg(feature = "backtrace")]
95+
fn get_construction_location(backtrace: &Backtrace) -> String {
96+
// Find the first frame which is after `debug_sync` (or which is in our tests) and use
97+
// that as the mutex construction site. Note that the first few frames may be in
98+
// `backtrace`, so we have to ignore those.
99+
let sync_mutex_constr_regex = regex::Regex::new(r"lightning.*debug_sync.*new").unwrap();
100+
let mut found_debug_sync = false;
101+
for frame in backtrace.frames() {
102+
for symbol in frame.symbols() {
103+
let symbol_name = symbol.name().unwrap().as_str().unwrap();
104+
if !sync_mutex_constr_regex.is_match(symbol_name) {
105+
if found_debug_sync {
106+
if let Some(col) = symbol.colno() {
107+
return format!("{}:{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap(), col);
108+
} else {
109+
// Windows debug symbols don't support column numbers, so fall back to
110+
// line numbers only if no `colno` is available
111+
return format!("{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap());
112+
}
113+
}
114+
} else { found_debug_sync = true; }
115+
}
116+
}
117+
panic!("Couldn't find mutex construction callsite");
118+
}
119+
67120
impl LockMetadata {
68-
fn new() -> LockMetadata {
69-
LockMetadata {
121+
fn new() -> Arc<LockMetadata> {
122+
let backtrace = Backtrace::new();
123+
let lock_idx = LOCK_IDX.fetch_add(1, Ordering::Relaxed) as u64;
124+
125+
let res = Arc::new(LockMetadata {
70126
locked_before: StdMutex::new(HashSet::new()),
71-
lock_idx: LOCK_IDX.fetch_add(1, Ordering::Relaxed) as u64,
72-
#[cfg(feature = "backtrace")]
73-
lock_construction_bt: Backtrace::new(),
127+
lock_idx,
128+
_lock_construction_bt: backtrace,
129+
});
130+
131+
#[cfg(feature = "backtrace")]
132+
{
133+
let lock_constr_location = get_construction_location(&res._lock_construction_bt);
134+
LOCKS_INIT.call_once(|| { unsafe { LOCKS = Some(StdMutex::new(HashMap::new())); } });
135+
let mut locks = unsafe { LOCKS.as_ref() }.unwrap().lock().unwrap();
136+
match locks.entry(lock_constr_location) {
137+
hash_map::Entry::Occupied(e) => return Arc::clone(e.get()),
138+
hash_map::Entry::Vacant(e) => { e.insert(Arc::clone(&res)); },
139+
}
74140
}
141+
res
75142
}
76143

77144
// Returns whether we were a recursive lock (only relevant for read)
@@ -89,18 +156,28 @@ impl LockMetadata {
89156
}
90157
for locked in held.borrow().iter() {
91158
if !read && *locked == *this {
92-
panic!("Tried to lock a lock while it was held!");
159+
// With `feature = "backtrace"` set, we may be looking at different instances
160+
// of the same lock.
161+
debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
93162
}
94163
for locked_dep in locked.locked_before.lock().unwrap().iter() {
95-
if *locked_dep == *this {
164+
if locked_dep.lock == *this && locked_dep.lock != *locked {
96165
#[cfg(feature = "backtrace")]
97-
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\n{:?}", locked.lock_construction_bt);
166+
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock being taken constructed at: {} ({}):\n{:?}\nLock constructed at: {} ({})\n{:?}\n\nLock dep created at:\n{:?}\n\n",
167+
get_construction_location(&this._lock_construction_bt), this.lock_idx, this._lock_construction_bt,
168+
get_construction_location(&locked._lock_construction_bt), locked.lock_idx, locked._lock_construction_bt,
169+
locked_dep.lockdep_trace);
98170
#[cfg(not(feature = "backtrace"))]
99171
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
100172
}
101173
}
102174
// Insert any already-held locks in our locked-before set.
103-
this.locked_before.lock().unwrap().insert(Arc::clone(locked));
175+
let mut locked_before = this.locked_before.lock().unwrap();
176+
let mut lockdep = LockDep::new_without_bt(locked);
177+
if !locked_before.contains(&lockdep) {
178+
lockdep.lockdep_trace = Some(Backtrace::new());
179+
locked_before.insert(lockdep);
180+
}
104181
}
105182
held.borrow_mut().insert(Arc::clone(this));
106183
inserted = true;
@@ -116,10 +193,15 @@ impl LockMetadata {
116193
// Since a try-lock will simply fail if the lock is held already, we do not
117194
// consider try-locks to ever generate lockorder inversions. However, if a try-lock
118195
// succeeds, we do consider it to have created lockorder dependencies.
196+
held.borrow_mut().insert(Arc::clone(this));
197+
let mut locked_before = this.locked_before.lock().unwrap();
119198
for locked in held.borrow().iter() {
120-
this.locked_before.lock().unwrap().insert(Arc::clone(locked));
199+
let mut lockdep = LockDep::new_without_bt(locked);
200+
if !locked_before.contains(&lockdep) {
201+
lockdep.lockdep_trace = Some(Backtrace::new());
202+
locked_before.insert(lockdep);
203+
}
121204
}
122-
held.borrow_mut().insert(Arc::clone(this));
123205
});
124206
}
125207
}
@@ -170,7 +252,7 @@ impl<T: Sized> DerefMut for MutexGuard<'_, T> {
170252

171253
impl<T> Mutex<T> {
172254
pub fn new(inner: T) -> Mutex<T> {
173-
Mutex { inner: StdMutex::new(inner), deps: Arc::new(LockMetadata::new()) }
255+
Mutex { inner: StdMutex::new(inner), deps: LockMetadata::new() }
174256
}
175257

176258
pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
@@ -249,7 +331,7 @@ impl<T: Sized> DerefMut for RwLockWriteGuard<'_, T> {
249331

250332
impl<T> RwLock<T> {
251333
pub fn new(inner: T) -> RwLock<T> {
252-
RwLock { inner: StdRwLock::new(inner), deps: Arc::new(LockMetadata::new()) }
334+
RwLock { inner: StdRwLock::new(inner), deps: LockMetadata::new() }
253335
}
254336

255337
pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
@@ -271,96 +353,101 @@ impl<T> RwLock<T> {
271353
}
272354
}
273355

274-
#[test]
275-
#[should_panic]
276-
fn recursive_lock_fail() {
277-
let mutex = Mutex::new(());
278-
let _a = mutex.lock().unwrap();
279-
let _b = mutex.lock().unwrap();
280-
}
281-
282-
#[test]
283-
fn recursive_read() {
284-
let lock = RwLock::new(());
285-
let _a = lock.read().unwrap();
286-
let _b = lock.read().unwrap();
287-
}
356+
pub type FairRwLock<T> = RwLock<T>;
288357

289-
#[test]
290-
#[should_panic]
291-
fn lockorder_fail() {
292-
let a = Mutex::new(());
293-
let b = Mutex::new(());
294-
{
295-
let _a = a.lock().unwrap();
296-
let _b = b.lock().unwrap();
297-
}
298-
{
299-
let _b = b.lock().unwrap();
300-
let _a = a.lock().unwrap();
358+
mod tests {
359+
use super::{RwLock, Mutex};
360+
361+
#[test]
362+
#[should_panic]
363+
#[cfg(not(feature = "backtrace"))]
364+
fn recursive_lock_fail() {
365+
let mutex = Mutex::new(());
366+
let _a = mutex.lock().unwrap();
367+
let _b = mutex.lock().unwrap();
368+
}
369+
370+
#[test]
371+
fn recursive_read() {
372+
let lock = RwLock::new(());
373+
let _a = lock.read().unwrap();
374+
let _b = lock.read().unwrap();
375+
}
376+
377+
#[test]
378+
#[should_panic]
379+
fn lockorder_fail() {
380+
let a = Mutex::new(());
381+
let b = Mutex::new(());
382+
{
383+
let _a = a.lock().unwrap();
384+
let _b = b.lock().unwrap();
385+
}
386+
{
387+
let _b = b.lock().unwrap();
388+
let _a = a.lock().unwrap();
389+
}
301390
}
302-
}
303391

304-
#[test]
305-
#[should_panic]
306-
fn write_lockorder_fail() {
307-
let a = RwLock::new(());
308-
let b = RwLock::new(());
309-
{
310-
let _a = a.write().unwrap();
311-
let _b = b.write().unwrap();
312-
}
313-
{
314-
let _b = b.write().unwrap();
315-
let _a = a.write().unwrap();
392+
#[test]
393+
#[should_panic]
394+
fn write_lockorder_fail() {
395+
let a = RwLock::new(());
396+
let b = RwLock::new(());
397+
{
398+
let _a = a.write().unwrap();
399+
let _b = b.write().unwrap();
400+
}
401+
{
402+
let _b = b.write().unwrap();
403+
let _a = a.write().unwrap();
404+
}
316405
}
317-
}
318406

319-
#[test]
320-
#[should_panic]
321-
fn read_lockorder_fail() {
322-
let a = RwLock::new(());
323-
let b = RwLock::new(());
324-
{
325-
let _a = a.read().unwrap();
326-
let _b = b.read().unwrap();
327-
}
328-
{
329-
let _b = b.read().unwrap();
330-
let _a = a.read().unwrap();
407+
#[test]
408+
#[should_panic]
409+
fn read_lockorder_fail() {
410+
let a = RwLock::new(());
411+
let b = RwLock::new(());
412+
{
413+
let _a = a.read().unwrap();
414+
let _b = b.read().unwrap();
415+
}
416+
{
417+
let _b = b.read().unwrap();
418+
let _a = a.read().unwrap();
419+
}
331420
}
332-
}
333421

334-
#[test]
335-
fn read_recurisve_no_lockorder() {
336-
// Like the above, but note that no lockorder is implied when we recursively read-lock a
337-
// RwLock, causing this to pass just fine.
338-
let a = RwLock::new(());
339-
let b = RwLock::new(());
340-
let _outer = a.read().unwrap();
341-
{
342-
let _a = a.read().unwrap();
343-
let _b = b.read().unwrap();
344-
}
345-
{
346-
let _b = b.read().unwrap();
347-
let _a = a.read().unwrap();
422+
#[test]
423+
fn read_recursive_no_lockorder() {
424+
// Like the above, but note that no lockorder is implied when we recursively read-lock a
425+
// RwLock, causing this to pass just fine.
426+
let a = RwLock::new(());
427+
let b = RwLock::new(());
428+
let _outer = a.read().unwrap();
429+
{
430+
let _a = a.read().unwrap();
431+
let _b = b.read().unwrap();
432+
}
433+
{
434+
let _b = b.read().unwrap();
435+
let _a = a.read().unwrap();
436+
}
348437
}
349-
}
350438

351-
#[test]
352-
#[should_panic]
353-
fn read_write_lockorder_fail() {
354-
let a = RwLock::new(());
355-
let b = RwLock::new(());
356-
{
357-
let _a = a.write().unwrap();
358-
let _b = b.read().unwrap();
359-
}
360-
{
361-
let _b = b.read().unwrap();
362-
let _a = a.write().unwrap();
439+
#[test]
440+
#[should_panic]
441+
fn read_write_lockorder_fail() {
442+
let a = RwLock::new(());
443+
let b = RwLock::new(());
444+
{
445+
let _a = a.write().unwrap();
446+
let _b = b.read().unwrap();
447+
}
448+
{
449+
let _b = b.read().unwrap();
450+
let _a = a.write().unwrap();
451+
}
363452
}
364453
}
365-
366-
pub type FairRwLock<T> = RwLock<T>;

0 commit comments

Comments
 (0)