Skip to content

Commit 2166c8a

Browse files
committed
Ignore lockorder violation on same callsite with different construction
As long as the lock order on such locks is still valid, we should allow them regardless of whether they were constructed at the same location or not. Note that we can only really enforce this if we require one lock call per line, or if we have access to symbol columns (as we do on Linux and macOS). We opt for a smaller patch by relying on the latter. This was previously triggered by some recent test changes to `test_manager_serialize_deserialize_inconsistent_monitor`. When the test ends and a node is dropped causing us to persist each, we'd detect a possible lockorder violation deadlock across three different `Mutex` instances that are held at the same location when serializing our `per_peer_states` in `ChannelManager::write`. The presumed lockorder violation happens because the first `Mutex` held shares the same construction location with the third one, while the second `Mutex` has a different construction location. When we hold the second one, we consider the first as a dependency, and then consider the second as a dependency when holding the third, causing a circular dependency (since the third shares the same construction location as the first). This isn't considered a lockorder violation that could result in a deadlock as the comment suggests inline though, since we are under a dependent write lock which no one else can have access to.
1 parent 04ee948 commit 2166c8a

File tree

1 file changed

+46
-29
lines changed

1 file changed

+46
-29
lines changed

lightning/src/sync/debug_sync.rs

+46-29
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,31 @@ struct LockDep {
7474
_lockdep_trace: Backtrace,
7575
}
7676

77+
// Locates the frame preceding the earliest `debug_sync` frame in the call stack. This ensures we
78+
// can properly detect a lock's construction and acquiral callsites, since the latter may contain
79+
// multiple `debug_sync` frames.
7780
#[cfg(feature = "backtrace")]
78-
fn get_construction_location(backtrace: &Backtrace) -> (String, Option<u32>) {
79-
// Find the first frame that is after `debug_sync` (or that is in our tests) and use
80-
// that as the mutex construction site. Note that the first few frames may be in
81-
// the `backtrace` crate, so we have to ignore those.
81+
fn locate_call_symbol(backtrace: &Backtrace) -> (String, Option<u32>) {
82+
// Find the earliest `debug_sync` frame (or that is in our tests) and use the frame preceding it
83+
// as the callsite. Note that the first few frames may be in the `backtrace` crate, so we have
84+
// to ignore those.
8285
let sync_mutex_constr_regex = regex::Regex::new(r"lightning.*debug_sync").unwrap();
8386
let mut found_debug_sync = false;
84-
for frame in backtrace.frames() {
85-
for symbol in frame.symbols() {
86-
let symbol_name = symbol.name().unwrap().as_str().unwrap();
87-
if !sync_mutex_constr_regex.is_match(symbol_name) {
88-
if found_debug_sync {
89-
return (format!("{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap()), symbol.colno());
90-
}
91-
} else { found_debug_sync = true; }
87+
let mut symbol_after_latest_debug_sync = None;
88+
for frame in backtrace.frames().iter() {
89+
for symbol in frame.symbols().iter() {
90+
if let Some(symbol_name) = symbol.name().map(|name| name.as_str()).flatten() {
91+
if !sync_mutex_constr_regex.is_match(symbol_name) {
92+
if found_debug_sync {
93+
symbol_after_latest_debug_sync = Some(symbol);
94+
found_debug_sync = false;
95+
}
96+
} else { found_debug_sync = true; }
97+
}
9298
}
9399
}
94-
panic!("Couldn't find mutex construction callsite");
100+
let symbol = symbol_after_latest_debug_sync.expect("Couldn't find lock call symbol");
101+
(format!("{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap()), symbol.colno())
95102
}
96103

97104
impl LockMetadata {
@@ -108,13 +115,13 @@ impl LockMetadata {
108115
#[cfg(feature = "backtrace")]
109116
{
110117
let (lock_constr_location, lock_constr_colno) =
111-
get_construction_location(&res._lock_construction_bt);
118+
locate_call_symbol(&res._lock_construction_bt);
112119
LOCKS_INIT.call_once(|| { unsafe { LOCKS = Some(StdMutex::new(HashMap::new())); } });
113120
let mut locks = unsafe { LOCKS.as_ref() }.unwrap().lock().unwrap();
114121
match locks.entry(lock_constr_location) {
115122
hash_map::Entry::Occupied(e) => {
116123
assert_eq!(lock_constr_colno,
117-
get_construction_location(&e.get()._lock_construction_bt).1,
124+
locate_call_symbol(&e.get()._lock_construction_bt).1,
118125
"Because Windows doesn't support column number results in backtraces, we cannot construct two mutexes on the same line or we risk lockorder detection false positives.");
119126
return Arc::clone(e.get())
120127
},
@@ -126,9 +133,8 @@ impl LockMetadata {
126133

127134
fn pre_lock(this: &Arc<LockMetadata>, _double_lock_self_allowed: bool) {
128135
LOCKS_HELD.with(|held| {
129-
// For each lock which is currently locked, check that no lock's locked-before
130-
// set includes the lock we're about to lock, which would imply a lockorder
131-
// inversion.
136+
// For each lock that is currently held, check that no lock's `locked_before` set
137+
// includes the lock we're about to hold, which would imply a lockorder inversion.
132138
for (locked_idx, _locked) in held.borrow().iter() {
133139
if *locked_idx == this.lock_idx {
134140
// Note that with `feature = "backtrace"` set, we may be looking at different
@@ -138,23 +144,34 @@ impl LockMetadata {
138144
#[cfg(feature = "backtrace")]
139145
debug_assert!(_double_lock_self_allowed,
140146
"Tried to acquire a lock while it was held!\nLock constructed at {}",
141-
get_construction_location(&this._lock_construction_bt).0);
147+
locate_call_symbol(&this._lock_construction_bt).0);
142148
#[cfg(not(feature = "backtrace"))]
143149
panic!("Tried to acquire a lock while it was held!");
144150
}
145151
}
146152
for (_locked_idx, locked) in held.borrow().iter() {
147153
for (locked_dep_idx, _locked_dep) in locked.locked_before.lock().unwrap().iter() {
148-
if *locked_dep_idx == this.lock_idx && *locked_dep_idx != locked.lock_idx {
149-
#[cfg(feature = "backtrace")]
150-
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock being taken constructed at: {} ({}):\n{:?}\nLock constructed at: {} ({})\n{:?}\n\nLock dep created at:\n{:?}\n\n",
151-
get_construction_location(&this._lock_construction_bt).0,
152-
this.lock_idx, this._lock_construction_bt,
153-
get_construction_location(&locked._lock_construction_bt).0,
154-
locked.lock_idx, locked._lock_construction_bt,
155-
_locked_dep._lockdep_trace);
156-
#[cfg(not(feature = "backtrace"))]
157-
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
154+
let is_dep_this_lock = *locked_dep_idx == this.lock_idx;
155+
let has_same_construction = *locked_dep_idx == locked.lock_idx;
156+
if is_dep_this_lock && !has_same_construction {
157+
#[allow(unused_mut, unused_assignments)]
158+
let mut has_same_callsite = false;
159+
#[cfg(feature = "backtrace")] {
160+
has_same_callsite = _double_lock_self_allowed &&
161+
locate_call_symbol(&_locked_dep._lockdep_trace) ==
162+
locate_call_symbol(&Backtrace::new());
163+
}
164+
if !has_same_callsite {
165+
#[cfg(feature = "backtrace")]
166+
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock being taken constructed at: {} ({}):\n{:?}\nLock constructed at: {} ({})\n{:?}\n\nLock dep created at:\n{:?}\n\n",
167+
locate_call_symbol(&this._lock_construction_bt).0,
168+
this.lock_idx, this._lock_construction_bt,
169+
locate_call_symbol(&locked._lock_construction_bt).0,
170+
locked.lock_idx, locked._lock_construction_bt,
171+
_locked_dep._lockdep_trace);
172+
#[cfg(not(feature = "backtrace"))]
173+
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
174+
}
158175
}
159176
}
160177
// Insert any already-held locks in our locked-before set.

0 commit comments

Comments
 (0)