Skip to content

Commit c566769

Browse files
yonghong-songYonghong Song
and
Yonghong Song
authored
BPF: Ensure __sync_fetch_and_add() always generate atomic_fetch_add insn (llvm#101428)
Peilen Ye reported an issue ([1]) where for __sync_fetch_and_add(...) without return value like __sync_fetch_and_add(&foo, 1); llvm BPF backend generates locked insn e.g. lock *(u32 *)(r1 + 0) += r2 If __sync_fetch_and_add(...) returns a value like res = __sync_fetch_and_add(&foo, 1); llvm BPF backend generates like r2 = atomic_fetch_add((u32 *)(r1 + 0), r2) The above generation of 'lock *(u32 *)(r1 + 0) += r2' caused a problem in jit since proper barrier is not inserted. The above discrepancy is due to commit [2] where it tries to maintain backward compatability since before commit [2], __sync_fetch_and_add(...) generates lock insn in BPF backend. Based on discussion in [1], now it is time to fix the above discrepancy so we can have proper barrier support in jit. This patch made sure that __sync_fetch_and_add(...) always generates atomic_fetch_add(...) insns. Now 'lock *(u32 *)(r1 + 0) += r2' can only be generated by inline asm. I also removed the whole BPFMIChecking.cpp file whose original purpose is to detect and issue errors if XADD{W,D,W32} may return a value used subsequently. Since insns XADD{W,D,W32} are all inline asm only now, such error detection is not needed. [1] https://lore.kernel.org/bpf/[email protected]/T/#mb68d67bc8f39e35a0c3db52468b9de59b79f021f [2] llvm@286daaf Co-authored-by: Yonghong Song <[email protected]>
1 parent 5f7e921 commit c566769

File tree

10 files changed

+28
-344
lines changed

10 files changed

+28
-344
lines changed

llvm/lib/Target/BPF/BPF.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
2828
FunctionPass *createBPFMISimplifyPatchablePass();
2929
FunctionPass *createBPFMIPeepholePass();
3030
FunctionPass *createBPFMIPreEmitPeepholePass();
31-
FunctionPass *createBPFMIPreEmitCheckingPass();
3231

3332
InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3433
const BPFSubtarget &,
@@ -37,7 +36,6 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3736
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
3837
void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
3938
void initializeBPFMIPeepholePass(PassRegistry &);
40-
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
4139
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
4240
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
4341

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -789,12 +789,12 @@ let Predicates = [BPFNoALU32] in {
789789
}
790790

791791
// Atomic XADD for BPFNoALU32
792-
class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
792+
class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
793793
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
794794
(outs GPR:$dst),
795795
(ins MEMri:$addr, GPR:$val),
796796
"lock *("#OpcodeStr#" *)($addr) += $val",
797-
[(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
797+
[]> {
798798
bits<4> dst;
799799
bits<20> addr;
800800

@@ -805,12 +805,6 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
805805
let BPFClass = BPF_STX;
806806
}
807807

808-
let Constraints = "$dst = $val" in {
809-
let Predicates = [BPFNoALU32] in {
810-
def XADDW : XADD<BPF_W, "u32", atomic_load_add_i32>;
811-
}
812-
}
813-
814808
// Atomic add, and, or, xor
815809
class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
816810
: TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
@@ -895,6 +889,13 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
895889
let BPFClass = BPF_STX;
896890
}
897891

892+
let Constraints = "$dst = $val" in {
893+
let Predicates = [BPFNoALU32] in {
894+
def XADDW : XADD<BPF_W, "u32">;
895+
def XFADDW : XFALU64<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
896+
}
897+
}
898+
898899
let Constraints = "$dst = $val" in {
899900
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
900901
def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;

llvm/lib/Target/BPF/BPFMIChecking.cpp

Lines changed: 0 additions & 251 deletions
This file was deleted.

llvm/lib/Target/BPF/BPFTargetMachine.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ void BPFPassConfig::addMachineSSAOptimization() {
178178
}
179179

180180
void BPFPassConfig::addPreEmitPass() {
181-
addPass(createBPFMIPreEmitCheckingPass());
182181
if (getOptLevel() != CodeGenOptLevel::None)
183182
if (!DisableMIPeephole)
184183
addPass(createBPFMIPreEmitPeepholePass());

llvm/lib/Target/BPF/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ add_llvm_target(BPFCodeGen
3939
BPFSubtarget.cpp
4040
BPFTargetMachine.cpp
4141
BPFMIPeephole.cpp
42-
BPFMIChecking.cpp
4342
BPFMISimplifyPatchable.cpp
4443
BTFDebug.cpp
4544

llvm/test/CodeGen/BPF/atomics.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,19 @@
1-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
2-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s
1+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s
2+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s
33

44
; CHECK-LABEL: test_load_add_32
5-
; CHECK: lock *(u32 *)(r1 + 0) += r2
6-
; CHECK: encoding: [0xc3,0x21
7-
; CHECK-V3: lock *(u32 *)(r1 + 0) += w2
8-
; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
5+
; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
6+
; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
7+
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
98
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
109
entry:
1110
atomicrmw add ptr %p, i32 %v seq_cst
1211
ret void
1312
}
1413

1514
; CHECK-LABEL: test_load_add_64
16-
; CHECK: lock *(u64 *)(r1 + 0) += r2
17-
; CHECK: encoding: [0xdb,0x21
18-
; CHECK-V3: lock *(u64 *)(r1 + 0) += r2
19-
; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
15+
; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
16+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
2017
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
2118
entry:
2219
atomicrmw add ptr %p, i64 %v seq_cst

llvm/test/CodeGen/BPF/atomics_2.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ entry:
214214
}
215215

216216
; CHECK-LABEL: test_atomic_xor_32
217-
; CHECK: lock *(u32 *)(r1 + 0) ^= w2
218-
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
217+
; CHECK: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
218+
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
219219
; CHECK: w0 = 0
220220
define dso_local i32 @test_atomic_xor_32(ptr nocapture %p, i32 %v) local_unnamed_addr {
221221
entry:
@@ -224,8 +224,8 @@ entry:
224224
}
225225

226226
; CHECK-LABEL: test_atomic_xor_64
227-
; CHECK: lock *(u64 *)(r1 + 0) ^= r2
228-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
227+
; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
228+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
229229
; CHECK: w0 = 0
230230
define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
231231
entry:
@@ -234,8 +234,8 @@ entry:
234234
}
235235

236236
; CHECK-LABEL: test_atomic_and_64
237-
; CHECK: lock *(u64 *)(r1 + 0) &= r2
238-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00]
237+
; CHECK: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
238+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x51,0x00,0x00,0x00]
239239
; CHECK: w0 = 0
240240
define dso_local i32 @test_atomic_and_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
241241
entry:
@@ -244,8 +244,8 @@ entry:
244244
}
245245

246246
; CHECK-LABEL: test_atomic_or_64
247-
; CHECK: lock *(u64 *)(r1 + 0) |= r2
248-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00]
247+
; CHECK: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
248+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x41,0x00,0x00,0x00]
249249
; CHECK: w0 = 0
250250
define dso_local i32 @test_atomic_or_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
251251
entry:

0 commit comments

Comments
 (0)