Skip to content

Commit 06c531e

Browse files
BPF: Generate locked insn for __sync_fetch_and_add() with cpu v1/v2 (#106494)
This patch contains two pars: - first to revert the patch #101428. - second to remove `atomic_fetch_and_*()` to `atomic_<op>()` conversion (when return value is not used), but preserve `__sync_fetch_and_add()` to locked insn with cpu v1/v2.
1 parent 57fe53c commit 06c531e

File tree

10 files changed

+280
-63
lines changed

10 files changed

+280
-63
lines changed

llvm/lib/Target/BPF/BPF.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
2828
FunctionPass *createBPFMISimplifyPatchablePass();
2929
FunctionPass *createBPFMIPeepholePass();
3030
FunctionPass *createBPFMIPreEmitPeepholePass();
31+
FunctionPass *createBPFMIPreEmitCheckingPass();
3132

3233
InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3334
const BPFSubtarget &,
@@ -36,6 +37,7 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3637
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
3738
void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
3839
void initializeBPFMIPeepholePass(PassRegistry &);
40+
void initializeBPFMIPreEmitCheckingPass(PassRegistry &);
3941
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
4042
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
4143

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 23 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -786,45 +786,13 @@ let Predicates = [BPFNoALU32] in {
786786
def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>;
787787
}
788788

789-
// Atomic XADD for BPFNoALU32
790-
class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
791-
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
792-
(outs GPR:$dst),
793-
(ins MEMri:$addr, GPR:$val),
794-
"lock *("#OpcodeStr#" *)($addr) += $val",
795-
[]> {
796-
bits<4> dst;
797-
bits<20> addr;
798-
799-
let Inst{51-48} = addr{19-16}; // base reg
800-
let Inst{55-52} = dst;
801-
let Inst{47-32} = addr{15-0}; // offset
802-
let Inst{7-4} = BPF_ADD.Value;
803-
let BPFClass = BPF_STX;
804-
}
805-
806789
// Atomic add, and, or, xor
807-
class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
808-
: TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
790+
class ATOMIC_NOFETCH<BPFWidthModifer SizeOp, string OpType, RegisterClass RegTp,
791+
BPFArithOp Opc, string Opstr>
792+
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
809793
(outs GPR:$dst),
810-
(ins MEMri:$addr, GPR:$val),
811-
"lock *(u64 *)($addr) " #Opstr# "= $val",
812-
[]> {
813-
bits<4> dst;
814-
bits<20> addr;
815-
816-
let Inst{51-48} = addr{19-16}; // base reg
817-
let Inst{55-52} = dst;
818-
let Inst{47-32} = addr{15-0}; // offset
819-
let Inst{7-4} = Opc.Value;
820-
let BPFClass = BPF_STX;
821-
}
822-
823-
class ATOMIC32_NOFETCH<BPFArithOp Opc, string Opstr>
824-
: TYPE_LD_ST<BPF_ATOMIC.Value, BPF_W.Value,
825-
(outs GPR32:$dst),
826-
(ins MEMri:$addr, GPR32:$val),
827-
"lock *(u32 *)($addr) " #Opstr# "= $val",
794+
(ins MEMri:$addr, RegTp:$val),
795+
"lock *(" #OpType# " *)($addr) " #Opstr# "= $val",
828796
[]> {
829797
bits<4> dst;
830798
bits<20> addr;
@@ -838,16 +806,23 @@ class ATOMIC32_NOFETCH<BPFArithOp Opc, string Opstr>
838806

839807
let Constraints = "$dst = $val" in {
840808
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
841-
def XADDW32 : ATOMIC32_NOFETCH<BPF_ADD, "+">;
842-
def XANDW32 : ATOMIC32_NOFETCH<BPF_AND, "&">;
843-
def XORW32 : ATOMIC32_NOFETCH<BPF_OR, "|">;
844-
def XXORW32 : ATOMIC32_NOFETCH<BPF_XOR, "^">;
809+
def XADDW32 : ATOMIC_NOFETCH<BPF_W, "u32", GPR32, BPF_ADD, "+">;
810+
def XANDW32 : ATOMIC_NOFETCH<BPF_W, "u32", GPR32, BPF_AND, "&">;
811+
def XORW32 : ATOMIC_NOFETCH<BPF_W, "u32", GPR32, BPF_OR, "|">;
812+
def XXORW32 : ATOMIC_NOFETCH<BPF_W, "u32", GPR32, BPF_XOR, "^">;
845813
}
814+
def XADDW : ATOMIC_NOFETCH<BPF_W, "u32", GPR, BPF_ADD, "+">;
815+
def XADDD : ATOMIC_NOFETCH<BPF_DW, "u64", GPR, BPF_ADD, "+">;
816+
def XANDD : ATOMIC_NOFETCH<BPF_DW, "u64", GPR, BPF_AND, "&">;
817+
def XORD : ATOMIC_NOFETCH<BPF_DW, "u64", GPR, BPF_OR, "|">;
818+
def XXORD : ATOMIC_NOFETCH<BPF_DW, "u64", GPR, BPF_XOR, "^">;
819+
}
846820

847-
def XADDD : ATOMIC_NOFETCH<BPF_ADD, "+">;
848-
def XANDD : ATOMIC_NOFETCH<BPF_AND, "&">;
849-
def XORD : ATOMIC_NOFETCH<BPF_OR, "|">;
850-
def XXORD : ATOMIC_NOFETCH<BPF_XOR, "^">;
821+
let Predicates = [BPFNoALU32] in {
822+
def : Pat<(atomic_load_add_i32 ADDRri:$addr, GPR:$val),
823+
(XADDW ADDRri:$addr, GPR:$val)>;
824+
def : Pat<(atomic_load_add_i64 ADDRri:$addr, GPR:$val),
825+
(XADDD ADDRri:$addr, GPR:$val)>;
851826
}
852827

853828
// Atomic Fetch-and-<add, and, or, xor> operations
@@ -887,13 +862,6 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
887862
let BPFClass = BPF_STX;
888863
}
889864

890-
let Constraints = "$dst = $val" in {
891-
let Predicates = [BPFNoALU32] in {
892-
def XADDW : XADD<BPF_W, "u32">;
893-
def XFADDW : XFALU64<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
894-
}
895-
}
896-
897865
let Constraints = "$dst = $val" in {
898866
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
899867
def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
@@ -902,7 +870,9 @@ let Constraints = "$dst = $val" in {
902870
def XFXORW32 : XFALU32<BPF_W, BPF_XOR, "u32", "xor", atomic_load_xor_i32>;
903871
}
904872

905-
def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_i64>;
873+
let Predicates = [BPFHasALU32] in {
874+
def XFADDD : XFALU64<BPF_DW, BPF_ADD, "u64", "add", atomic_load_add_i64>;
875+
}
906876
def XFANDD : XFALU64<BPF_DW, BPF_AND, "u64", "and", atomic_load_and_i64>;
907877
def XFORD : XFALU64<BPF_DW, BPF_OR, "u64", "or", atomic_load_or_i64>;
908878
def XFXORD : XFALU64<BPF_DW, BPF_XOR, "u64", "xor", atomic_load_xor_i64>;

llvm/lib/Target/BPF/BPFMIChecking.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass performs checking to signal errors for certain illegal usages at
10+
// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
11+
// not be used. The pass is done at the PreEmit pass right before the
12+
// machine code is emitted at which point the register liveness information
13+
// is still available.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "BPF.h"
18+
#include "BPFInstrInfo.h"
19+
#include "BPFTargetMachine.h"
20+
#include "llvm/CodeGen/MachineFunctionPass.h"
21+
#include "llvm/CodeGen/MachineInstrBuilder.h"
22+
#include "llvm/CodeGen/MachineRegisterInfo.h"
23+
#include "llvm/IR/DiagnosticInfo.h"
24+
#include "llvm/Support/Debug.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "bpf-mi-checking"
29+
30+
namespace {
31+
32+
struct BPFMIPreEmitChecking : public MachineFunctionPass {
33+
34+
static char ID;
35+
MachineFunction *MF;
36+
const TargetRegisterInfo *TRI;
37+
38+
BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
39+
initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
40+
}
41+
42+
private:
43+
// Initialize class variables.
44+
void initialize(MachineFunction &MFParm);
45+
46+
void processAtomicInsts();
47+
48+
public:
49+
// Main entry point for this pass.
50+
bool runOnMachineFunction(MachineFunction &MF) override {
51+
if (!skipFunction(MF.getFunction())) {
52+
initialize(MF);
53+
processAtomicInsts();
54+
}
55+
return false;
56+
}
57+
};
58+
59+
// Initialize class variables.
60+
void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
61+
MF = &MFParm;
62+
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
63+
LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
64+
}
65+
66+
// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
67+
// used.
68+
//
69+
// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
70+
// source and destination operands of XADD are GPR32, there is no sub-register
71+
// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
72+
// will raise false alarm on GPR32 Def.
73+
//
74+
// To support GPR32 Def, ideally we could just enable sub-registr liveness track
75+
// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
76+
// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
77+
//
78+
// However, sub-register liveness tracking module inside LLVM is actually
79+
// designed for the situation where one register could be split into more than
80+
// one sub-registers for which case each sub-register could have their own
81+
// liveness and kill one of them doesn't kill others. So, tracking liveness for
82+
// each make sense.
83+
//
84+
// For BPF, each 64-bit register could only have one 32-bit sub-register. This
85+
// is exactly the case which LLVM think brings no benefits for doing
86+
// sub-register tracking, because the live range of sub-register must always
87+
// equal to its parent register, therefore liveness tracking is disabled even
88+
// the back-end has implemented enableSubRegLiveness. The detailed information
89+
// is at r232695:
90+
//
91+
// Author: Matthias Braun <[email protected]>
92+
// Date: Thu Mar 19 00:21:58 2015 +0000
93+
// Do not track subregister liveness when it brings no benefits
94+
//
95+
// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
96+
// sub-register always has the same liveness as its parent register, LLVM is
97+
// already attaching a implicit 64-bit register Def whenever the there is
98+
// a sub-register Def. The liveness of the implicit 64-bit Def is available.
99+
// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
100+
// be:
101+
//
102+
// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
103+
// implicit killed $r9, implicit-def dead $r9
104+
//
105+
// Even though w9 is not marked as Dead, the parent register r9 is marked as
106+
// Dead correctly, and it is safe to use such information or our purpose.
107+
static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
108+
const MCRegisterClass *GPR64RegClass =
109+
&BPFMCRegisterClasses[BPF::GPRRegClassID];
110+
std::vector<unsigned> GPR32LiveDefs;
111+
std::vector<unsigned> GPR64DeadDefs;
112+
113+
for (const MachineOperand &MO : MI.operands()) {
114+
bool RegIsGPR64;
115+
116+
if (!MO.isReg() || MO.isUse())
117+
continue;
118+
119+
RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
120+
if (!MO.isDead()) {
121+
// It is a GPR64 live Def, we are sure it is live. */
122+
if (RegIsGPR64)
123+
return true;
124+
// It is a GPR32 live Def, we are unsure whether it is really dead due to
125+
// no sub-register liveness tracking. Push it to vector for deferred
126+
// check.
127+
GPR32LiveDefs.push_back(MO.getReg());
128+
continue;
129+
}
130+
131+
// Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
132+
// low 32-bit.
133+
if (RegIsGPR64)
134+
GPR64DeadDefs.push_back(MO.getReg());
135+
}
136+
137+
// No GPR32 live Def, safe to return false.
138+
if (GPR32LiveDefs.empty())
139+
return false;
140+
141+
// No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
142+
// must be truely live, safe to return true.
143+
if (GPR64DeadDefs.empty())
144+
return true;
145+
146+
// Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
147+
for (auto I : GPR32LiveDefs)
148+
for (MCPhysReg SR : TRI->superregs(I))
149+
if (!llvm::is_contained(GPR64DeadDefs, SR))
150+
return true;
151+
152+
return false;
153+
}
154+
155+
void BPFMIPreEmitChecking::processAtomicInsts() {
156+
for (MachineBasicBlock &MBB : *MF) {
157+
for (MachineInstr &MI : MBB) {
158+
if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD)
159+
continue;
160+
161+
LLVM_DEBUG(MI.dump());
162+
if (hasLiveDefs(MI, TRI)) {
163+
DebugLoc Empty;
164+
const DebugLoc &DL = MI.getDebugLoc();
165+
const Function &F = MF->getFunction();
166+
F.getContext().diagnose(DiagnosticInfoUnsupported{
167+
F, "Invalid usage of the XADD return value", DL});
168+
}
169+
}
170+
}
171+
}
172+
173+
} // namespace
174+
175+
INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
176+
"BPF PreEmit Checking", false, false)
177+
178+
char BPFMIPreEmitChecking::ID = 0;
179+
FunctionPass *llvm::createBPFMIPreEmitCheckingPass() {
180+
return new BPFMIPreEmitChecking();
181+
}

llvm/lib/Target/BPF/BPFTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
178178
}
179179

180180
void BPFPassConfig::addPreEmitPass() {
181+
addPass(createBPFMIPreEmitCheckingPass());
181182
if (getOptLevel() != CodeGenOptLevel::None)
182183
if (!DisableMIPeephole)
183184
addPass(createBPFMIPreEmitPeepholePass());

llvm/lib/Target/BPF/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ add_llvm_target(BPFCodeGen
3939
BPFSubtarget.cpp
4040
BPFTargetMachine.cpp
4141
BPFMIPeephole.cpp
42+
BPFMIChecking.cpp
4243
BPFMISimplifyPatchable.cpp
4344
BTFDebug.cpp
4445

llvm/test/CodeGen/BPF/atomics.ll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
1-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s
2-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s
1+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
2+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s
33

44
; CHECK-LABEL: test_load_add_32
5-
; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
5+
; CHECK: lock *(u32 *)(r1 + 0) += r2
6+
; CHECK: encoding: [0xc3,0x21
67
; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
7-
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
8+
; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
89
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
910
entry:
1011
atomicrmw add ptr %p, i32 %v seq_cst
1112
ret void
1213
}
1314

1415
; CHECK-LABEL: test_load_add_64
15-
; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
16-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
16+
; CHECK: lock *(u64 *)(r1 + 0) += r2
17+
; CHECK: encoding: [0xdb,0x21
18+
; CHECK-V3: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
19+
; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
1720
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
1821
entry:
1922
atomicrmw add ptr %p, i64 %v seq_cst

llvm/test/CodeGen/BPF/atomics_2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ entry:
224224
}
225225

226226
; CHECK-LABEL: test_atomic_xor_64
227-
; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
227+
; CHECK: atomic_fetch_xor((u64 *)(r1 + 0), r2)
228228
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
229229
; CHECK: w0 = 0
230230
define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {

llvm/test/CodeGen/BPF/objdump_atomics.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
; CHECK-LABEL: test_load_add_32
44
; CHECK: c3 21
5-
; CHECK: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
5+
; CHECK: lock *(u32 *)(r1 + 0) += w2
66
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
77
entry:
88
atomicrmw add ptr %p, i32 %v seq_cst
@@ -11,7 +11,7 @@ entry:
1111

1212
; CHECK-LABEL: test_load_add_64
1313
; CHECK: db 21
14-
; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
14+
; CHECK: lock *(u64 *)(r1 + 0) += r2
1515
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
1616
entry:
1717
atomicrmw add ptr %p, i64 %v seq_cst

0 commit comments

Comments
 (0)