Skip to content

Commit b3862c3

Browse files
SchrodingerZhuyuxuanchen1997
authored andcommitted
[libc] add a simple TTAS spin lock (#98846)
1 parent 1bfe3e7 commit b3862c3

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

libc/src/__support/threads/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@ add_header_library(
1010
sleep.h
1111
)
1212

13+
add_header_library(
14+
spin_lock
15+
HDRS
16+
spin_lock.h
17+
DEPENDS
18+
.sleep
19+
libc.src.__support.CPP.atomic
20+
)
21+
1322
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
1423
add_subdirectory(${LIBC_TARGET_OS})
1524
endif()
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//===-- TTAS Spin Lock ----------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
10+
#define LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H
11+
12+
#include "src/__support/CPP/atomic.h"
13+
#include "src/__support/macros/attributes.h"
14+
#include "src/__support/macros/properties/architectures.h"
15+
#include "src/__support/threads/sleep.h"
16+
17+
namespace LIBC_NAMESPACE_DECL {
18+
19+
namespace spinlock {
20+
template <typename LockWord, typename Return>
21+
using AtomicOp = Return (cpp::Atomic<LockWord>::*)(LockWord, cpp::MemoryOrder,
22+
cpp::MemoryScope);
23+
}
24+
25+
template <typename LockWord, spinlock::AtomicOp<LockWord, LockWord> Acquire,
26+
spinlock::AtomicOp<LockWord, void> Release>
27+
class SpinLockAdaptor {
28+
cpp::Atomic<LockWord> flag;
29+
30+
public:
31+
LIBC_INLINE constexpr SpinLockAdaptor() : flag{false} {}
32+
LIBC_INLINE bool try_lock() {
33+
return !flag.*Acquire(static_cast<LockWord>(1), cpp::MemoryOrder::ACQUIRE);
34+
}
35+
LIBC_INLINE void lock() {
36+
// clang-format off
37+
// For normal TTAS, this compiles to the following on armv9a and x86_64:
38+
// mov w8, #1 | .LBB0_1:
39+
// .LBB0_1: | mov al, 1
40+
// swpab w8, w9, [x0] | xchg byte ptr [rdi], al
41+
// tbnz w9, #0, .LBB0_3 | test al, 1
42+
// b .LBB0_4 | jne .LBB0_3
43+
// .LBB0_2: | jmp .LBB0_4
44+
// isb | .LBB0_2:
45+
// .LBB0_3: | pause
46+
// ldrb w9, [x0] | .LBB0_3:
47+
// tbnz w9, #0, .LBB0_2 | movzx eax, byte ptr [rdi]
48+
// b .LBB0_1 | test al, 1
49+
// .LBB0_4: | jne .LBB0_2
50+
// ret | jmp .LBB0_1
51+
// | .LBB0_4:
52+
// | ret
53+
// clang-format on
54+
// Notice that inside the busy loop .LBB0_2 and .LBB0_3, only instructions
55+
// with load semantics are used. swpab/xchg is only issued in outer loop
56+
// .LBB0_1. This is useful to avoid extra write traffic. The cache
57+
// coherence guarantees "write propagation", so even if the inner loop only
58+
// reads with relaxed ordering, the thread will evetually see the write.
59+
while (!try_lock())
60+
while (flag.load(cpp::MemoryOrder::RELAXED))
61+
sleep_briefly();
62+
}
63+
LIBC_INLINE void unlock() {
64+
flag.*Release(static_cast<LockWord>(0), cpp::MemoryOrder::RELEASE);
65+
}
66+
};
67+
68+
// It is reported that atomic operations with higher-order semantics
69+
// lead to better performance on GPUs.
70+
#ifdef LIBC_TARGET_ARCH_IS_GPU
71+
using SpinLock =
72+
SpinLockAdaptor<unsigned int, &cpp::Atomic<unsigned int>::fetch_or,
73+
&cpp::Atomic<unsigned int>::fetch_and>;
74+
#else
75+
using SpinLock = SpinLockAdaptor<bool, &cpp::Atomic<bool>::exchange,
76+
&cpp::Atomic<bool>::store>;
77+
#endif
78+
79+
} // namespace LIBC_NAMESPACE_DECL
80+
81+
#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_SPIN_LOCK_H

0 commit comments

Comments
 (0)