Skip to content

Commit 157abfb

Browse files
committed
Wrap RwLock in peer_handler to avoid readers starving writers
Because we handle messages (which can take some time, persisting things to disk or validating cryptographic signatures) with the top-level read lock, but require the top-level write lock to connect new peers or handle disconnection, we are particularly sensitive to writer starvation issues. Rust's libstd RwLock does not provide any fairness guarantees, using whatever the OS provides as-is. On Linux, pthreads defaults to starving writers, which Rust's RwLock exposes to us (without any configurability). Here we work around that issue by blocking readers if there are pending writers, optimizing for readable code over perfectly-optimized blocking.
1 parent 5fa690b commit 157abfb

File tree

1 file changed

+51
-3
lines changed

1 file changed

+51
-3
lines changed

lightning/src/ln/peer_handler.rs

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use routing::network_graph::NetGraphMsgHandler;
3232
use prelude::*;
3333
use io;
3434
use alloc::collections::LinkedList;
35-
use sync::{Arc, Mutex, MutexGuard, RwLock};
35+
use sync::{Arc, Mutex, MutexGuard};
3636
use core::sync::atomic::{AtomicUsize, Ordering};
3737
use core::{cmp, hash, fmt, mem};
3838
use core::ops::Deref;
@@ -279,6 +279,54 @@ impl error::Error for PeerHandleError {
279279
}
280280
}
281281

282+
283+
#[cfg(feature = "std")]
284+
mod rwlock {
285+
use std::sync::{LockResult, RwLock, RwLockReadGuard, RwLockWriteGuard};
286+
use std::sync::atomic::{AtomicUsize, Ordering};
287+
288+
/// Rust libstd's RwLock does not provide any fairness guarantees (and, in fact, when used on
289+
/// Linux with pthreads under the hood, readers trivially and completely starve writers).
290+
/// Because we often hold read locks while doing message processing in multiple threads which
291+
/// can use significant CPU time, with write locks being time-sensitive but relatively small in
292+
/// CPU time, we can end up with starvation completely blocking incoming connections or pings,
293+
/// especially during initial graph sync.
294+
///
295+
/// Thus, we need to block readers when a writer is pending, which we do with a trivial RwLock
296+
/// wrapper here. Its not particularly optimized, but provides some reasonable fairness by
297+
/// blocking readers (by taking the write lock) if there are writers pending when we go to take
298+
/// a read lock.
299+
pub struct FairRwLock<T> {
300+
lock: RwLock<T>,
301+
waiting_writers: AtomicUsize,
302+
}
303+
304+
impl<T> FairRwLock<T> {
305+
pub fn new(t: T) -> Self {
306+
Self { lock: RwLock::new(t), waiting_writers: AtomicUsize::new(0) }
307+
}
308+
309+
pub fn write(&self) -> LockResult<RwLockWriteGuard<T>> {
310+
self.waiting_writers.fetch_add(1, Ordering::AcqRel);
311+
let res = self.lock.write();
312+
self.waiting_writers.fetch_sub(1, Ordering::AcqRel);
313+
res
314+
}
315+
316+
pub fn read(&self) -> LockResult<RwLockReadGuard<T>> {
317+
if self.waiting_writers.load(Ordering::Acquire) != 0 {
318+
let _write_queue_lock = self.lock.write();
319+
}
320+
self.lock.read()
321+
}
322+
}
323+
}
324+
#[cfg(feature = "std")]
325+
use self::rwlock::*;
326+
327+
#[cfg(not(feature = "std"))]
328+
type FairRwLock<T> = crate::sync::RwLock<T>;
329+
282330
enum InitSyncTracker{
283331
NoSyncRequested,
284332
ChannelsSyncing(u64),
@@ -390,7 +438,7 @@ pub struct PeerManager<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: De
390438
L::Target: Logger,
391439
CMH::Target: CustomMessageHandler {
392440
message_handler: MessageHandler<CM, RM>,
393-
peers: RwLock<PeerHolder<Descriptor>>,
441+
peers: FairRwLock<PeerHolder<Descriptor>>,
394442
/// Only add to this set when noise completes.
395443
/// Locked *after* peers. When an item is removed, it must be removed with the `peers` write
396444
/// lock held. Entries may be added with only the `peers` read lock held (though the
@@ -494,7 +542,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
494542

495543
PeerManager {
496544
message_handler,
497-
peers: RwLock::new(PeerHolder {
545+
peers: FairRwLock::new(PeerHolder {
498546
peers: HashMap::new(),
499547
}),
500548
node_id_to_descriptor: Mutex::new(HashMap::new()),

0 commit comments

Comments
 (0)