Skip to content

Commit bc08b44

Browse files
committed
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check that the lock is not taken, and do the reference count update using a cmpxchg() loop. This is semantically identical to doing the reference count update protected by the lock, but avoids the "wait for lock" contention that you get when accesses to the reference count are contended. Note that a "lockref" is absolutely _not_ equivalent to an atomic_t. Even when the lockref reference counts are updated atomically with cmpxchg, the fact that they also verify the state of the spinlock means that the lockless updates can never happen while somebody else holds the spinlock. So while "lockref_put_or_lock()" looks a lot like just another name for "atomic_dec_and_lock()", and both optimize to lockless updates, they are fundamentally different: the decrement done by atomic_dec_and_lock() is truly independent of any lock (as long as it doesn't decrement to zero), so a locked region can still see the count change. The lockref structure, in contrast, really is a *locked* reference count. If you hold the spinlock, the reference count will be stable and you can modify the reference count without using atomics, because even the lockless updates will see and respect the state of the lock. In order to enable the cmpxchg lockless code, the architecture needs to do three things: (1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit in an aligned u64, and have a "cmpxchg()" implementation that works on such a u64 data type. (2) define a helper function to test for a spinlock being unlocked ("arch_spin_value_unlocked()") (3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its Kconfig file. This enables it for x86-64 (but not 32-bit, we'd need to make sure cmpxchg() turns into the proper cmpxchg8b in order to enable it for 32-bit mode). Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2f4f12e commit bc08b44

File tree

5 files changed

+84
-3
lines changed

5 files changed

+84
-3
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ config X86_64
1616
def_bool y
1717
depends on 64BIT
1818
select X86_DEV_DMA_OPS
19+
select ARCH_USE_CMPXCHG_LOCKREF
1920

2021
### Arch settings
2122
config X86

arch/x86/include/asm/spinlock.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
# define UNLOCK_LOCK_PREFIX
3535
#endif
3636

37+
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
38+
{
39+
return lock.tickets.head == lock.tickets.tail;
40+
}
41+
3742
/*
3843
* Ticket locks are conceptually two parts, one indicating the current head of
3944
* the queue, and the other indicating the current tail. The lock is acquired

include/linux/lockref.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,15 @@
1717
#include <linux/spinlock.h>
1818

1919
struct lockref {
20-
spinlock_t lock;
21-
unsigned int count;
20+
union {
21+
#ifdef CONFIG_CMPXCHG_LOCKREF
22+
aligned_u64 lock_count;
23+
#endif
24+
struct {
25+
spinlock_t lock;
26+
unsigned int count;
27+
};
28+
};
2229
};
2330

2431
extern void lockref_get(struct lockref *);

lib/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ config STMP_DEVICE
4848
config PERCPU_RWSEM
4949
boolean
5050

51+
config ARCH_USE_CMPXCHG_LOCKREF
52+
bool
53+
54+
config CMPXCHG_LOCKREF
55+
def_bool y if ARCH_USE_CMPXCHG_LOCKREF
56+
depends on SMP
57+
depends on !GENERIC_LOCKBREAK
58+
depends on !DEBUG_SPINLOCK
59+
depends on !DEBUG_LOCK_ALLOC
60+
5161
config CRC_CCITT
5262
tristate "CRC-CCITT functions"
5363
help

lib/lockref.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,33 @@
11
#include <linux/export.h>
22
#include <linux/lockref.h>
33

4+
#ifdef CONFIG_CMPXCHG_LOCKREF
5+
6+
/*
7+
* Note that the "cmpxchg()" reloads the "old" value for the
8+
* failure case.
9+
*/
10+
#define CMPXCHG_LOOP(CODE, SUCCESS) do { \
11+
struct lockref old; \
12+
BUILD_BUG_ON(sizeof(old) != 8); \
13+
old.lock_count = ACCESS_ONCE(lockref->lock_count); \
14+
while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \
15+
struct lockref new = old, prev = old; \
16+
CODE \
17+
old.lock_count = cmpxchg(&lockref->lock_count, \
18+
old.lock_count, new.lock_count); \
19+
if (likely(old.lock_count == prev.lock_count)) { \
20+
SUCCESS; \
21+
} \
22+
} \
23+
} while (0)
24+
25+
#else
26+
27+
#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
28+
29+
#endif
30+
431
/**
532
* lockref_get - Increments reference count unconditionally
633
* @lockcnt: pointer to lockref structure
@@ -10,6 +37,12 @@
1037
*/
1138
void lockref_get(struct lockref *lockref)
1239
{
40+
CMPXCHG_LOOP(
41+
new.count++;
42+
,
43+
return;
44+
);
45+
1346
spin_lock(&lockref->lock);
1447
lockref->count++;
1548
spin_unlock(&lockref->lock);
@@ -23,9 +56,18 @@ EXPORT_SYMBOL(lockref_get);
2356
*/
2457
int lockref_get_not_zero(struct lockref *lockref)
2558
{
26-
int retval = 0;
59+
int retval;
60+
61+
CMPXCHG_LOOP(
62+
new.count++;
63+
if (!old.count)
64+
return 0;
65+
,
66+
return 1;
67+
);
2768

2869
spin_lock(&lockref->lock);
70+
retval = 0;
2971
if (lockref->count) {
3072
lockref->count++;
3173
retval = 1;
@@ -43,6 +85,14 @@ EXPORT_SYMBOL(lockref_get_not_zero);
4385
*/
4486
int lockref_get_or_lock(struct lockref *lockref)
4587
{
88+
CMPXCHG_LOOP(
89+
new.count++;
90+
if (!old.count)
91+
break;
92+
,
93+
return 1;
94+
);
95+
4696
spin_lock(&lockref->lock);
4797
if (!lockref->count)
4898
return 0;
@@ -59,6 +109,14 @@ EXPORT_SYMBOL(lockref_get_or_lock);
59109
*/
60110
int lockref_put_or_lock(struct lockref *lockref)
61111
{
112+
CMPXCHG_LOOP(
113+
new.count--;
114+
if (old.count <= 1)
115+
break;
116+
,
117+
return 1;
118+
);
119+
62120
spin_lock(&lockref->lock);
63121
if (lockref->count <= 1)
64122
return 0;

0 commit comments

Comments
 (0)