Skip to content

Commit bacc578

Browse files
committed
Create a simple FairRwLock to avoid readers starving writers
Because we handle messages (which can take some time, persisting things to disk or validating cryptographic signatures) with the top-level read lock, but require the top-level write lock to connect new peers or handle disconnection, we are particularly sensitive to writer starvation issues. Rust's libstd RwLock does not provide any fairness guarantees, using whatever the OS provides as-is. On Linux, pthreads defaults to starving writers, which Rust's RwLock exposes to us (without any configurability). Here we work around that issue by blocking readers if there are pending writers, optimizing for readable code over perfectly-optimized blocking.
1 parent 97435c3 commit bacc578

File tree

3 files changed

+57
-3
lines changed

3 files changed

+57
-3
lines changed

lightning/src/ln/peer_handler.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ use ln::peer_channel_encryptor::{PeerChannelEncryptor,NextNoiseStep};
2626
use ln::wire;
2727
use ln::wire::Encode;
2828
use util::atomic_counter::AtomicCounter;
29+
use util::fairrwlock::FairRwLock;
2930
use util::events::{MessageSendEvent, MessageSendEventsProvider};
3031
use util::logger::Logger;
3132
use routing::network_graph::{NetworkGraph, NetGraphMsgHandler};
3233

3334
use prelude::*;
3435
use io;
3536
use alloc::collections::LinkedList;
36-
use sync::{Arc, Mutex, MutexGuard, RwLock};
37+
use sync::{Arc, Mutex, MutexGuard};
3738
use core::sync::atomic::{AtomicUsize, Ordering};
3839
use core::{cmp, hash, fmt, mem};
3940
use core::ops::Deref;
@@ -413,7 +414,7 @@ pub struct PeerManager<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: De
413414
L::Target: Logger,
414415
CMH::Target: CustomMessageHandler {
415416
message_handler: MessageHandler<CM, RM>,
416-
peers: RwLock<PeerHolder<Descriptor>>,
417+
peers: FairRwLock<PeerHolder<Descriptor>>,
417418
/// Only add to this set when noise completes.
418419
/// Locked *after* peers. When an item is removed, it must be removed with the `peers` write
419420
/// lock held. Entries may be added with only the `peers` read lock held (though the
@@ -525,7 +526,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
525526

526527
PeerManager {
527528
message_handler,
528-
peers: RwLock::new(PeerHolder {
529+
peers: FairRwLock::new(PeerHolder {
529530
peers: HashMap::new(),
530531
}),
531532
node_id_to_descriptor: Mutex::new(HashMap::new()),

lightning/src/util/fairrwlock.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#[cfg(feature = "std")]
2+
mod rwlock {
3+
use std::sync::{TryLockResult, LockResult, RwLock, RwLockReadGuard, RwLockWriteGuard};
4+
use std::sync::atomic::{AtomicUsize, Ordering};
5+
6+
/// Rust libstd's RwLock does not provide any fairness guarantees (and, in fact, when used on
7+
/// Linux with pthreads under the hood, readers trivially and completely starve writers).
8+
/// Because we often hold read locks while doing message processing in multiple threads which
9+
/// can use significant CPU time, with write locks being time-sensitive but relatively small in
10+
/// CPU time, we can end up with starvation completely blocking incoming connections or pings,
11+
/// especially during initial graph sync.
12+
///
13+
/// Thus, we need to block readers when a writer is pending, which we do with a trivial RwLock
14+
/// wrapper here. Its not particularly optimized, but provides some reasonable fairness by
15+
/// blocking readers (by taking the write lock) if there are writers pending when we go to take
16+
/// a read lock.
17+
pub struct FairRwLock<T> {
18+
lock: RwLock<T>,
19+
waiting_writers: AtomicUsize,
20+
}
21+
22+
impl<T> FairRwLock<T> {
23+
pub fn new(t: T) -> Self {
24+
Self { lock: RwLock::new(t), waiting_writers: AtomicUsize::new(0) }
25+
}
26+
27+
pub fn write(&self) -> LockResult<RwLockWriteGuard<T>> {
28+
self.waiting_writers.fetch_add(1, Ordering::AcqRel);
29+
let res = self.lock.write();
30+
self.waiting_writers.fetch_sub(1, Ordering::AcqRel);
31+
res
32+
}
33+
34+
pub fn try_write(&self) -> TryLockResult<RwLockWriteGuard<T>> {
35+
self.lock.try_write()
36+
}
37+
38+
pub fn read(&self) -> LockResult<RwLockReadGuard<T>> {
39+
if self.waiting_writers.load(Ordering::Acquire) != 0 {
40+
let _write_queue_lock = self.lock.write();
41+
}
42+
self.lock.read()
43+
}
44+
}
45+
}
46+
#[cfg(feature = "std")]
47+
pub use self::rwlock::*;
48+
49+
#[cfg(not(feature = "std"))]
50+
pub type FairRwLock<T> = crate::sync::RwLock<T>;
51+
52+

lightning/src/util/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub mod message_signing;
2323
pub(crate) mod atomic_counter;
2424
pub(crate) mod byte_utils;
2525
pub(crate) mod chacha20;
26+
pub(crate) mod fairrwlock;
2627
#[cfg(feature = "fuzztarget")]
2728
pub mod zbase32;
2829
#[cfg(not(feature = "fuzztarget"))]

0 commit comments

Comments
 (0)