Skip to content

Commit 08a89bb

Browse files
authored
[AArch64] Codegen for 16/32/64-bit floating-point atomicrmw ops (#125686)
Codegen for AArch64 16/32/64-bit floating-point atomic read-modify-write operations (`atomicrmw {fadd,fmin,fmax}`) using LD{B}FADD, LD{B}FMAX and LD{B}FMIN atomic instructions.
1 parent f7daa9d commit 08a89bb

File tree

6 files changed

+308
-1276
lines changed

6 files changed

+308
-1276
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
984984
#undef LCALLNAME5
985985
}
986986

987+
if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
988+
setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f16, LibCall);
989+
setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f32, LibCall);
990+
setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::f64, LibCall);
991+
setOperationAction(ISD::ATOMIC_LOAD_FADD, MVT::bf16, LibCall);
992+
993+
setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f16, LibCall);
994+
setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f32, LibCall);
995+
setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::f64, LibCall);
996+
setOperationAction(ISD::ATOMIC_LOAD_FMAX, MVT::bf16, LibCall);
997+
998+
setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f16, LibCall);
999+
setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f32, LibCall);
1000+
setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::f64, LibCall);
1001+
setOperationAction(ISD::ATOMIC_LOAD_FMIN, MVT::bf16, LibCall);
1002+
}
1003+
9871004
if (Subtarget->hasLSE128()) {
9881005
// Custom lowering because i128 is not legal. Must be replaced by 2x64
9891006
// values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
@@ -27907,6 +27924,12 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2790727924
if (CanUseLSE128)
2790827925
return AtomicExpansionKind::None;
2790927926

27927+
// If LSFE available, use atomic FP instructions in preference to expansion
27928+
if (Subtarget->hasLSFE() && (AI->getOperation() == AtomicRMWInst::FAdd ||
27929+
AI->getOperation() == AtomicRMWInst::FMax ||
27930+
AI->getOperation() == AtomicRMWInst::FMin))
27931+
return AtomicExpansionKind::None;
27932+
2791027933
// Nand is not supported in LSE.
2791127934
// Leave 128 bits to LLSC or CmpXChg.
2791227935
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,20 @@ let Predicates = [HasLSE] in {
543543
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
544544
}
545545

546+
defm atomic_load_fadd : binary_atomic_op_fp<atomic_load_fadd>;
547+
defm atomic_load_fmin : binary_atomic_op_fp<atomic_load_fmin>;
548+
defm atomic_load_fmax : binary_atomic_op_fp<atomic_load_fmax>;
549+
550+
let Predicates = [HasLSFE] in {
551+
defm : LDFPOPregister_patterns<"LDFADD", "atomic_load_fadd">;
552+
defm : LDFPOPregister_patterns<"LDFMAXNM", "atomic_load_fmax">;
553+
defm : LDFPOPregister_patterns<"LDFMINNM", "atomic_load_fmin">;
554+
555+
defm : LDBFPOPregister_patterns<"LDBFADD", "atomic_load_fadd">;
556+
defm : LDBFPOPregister_patterns<"LDBFMAXNM", "atomic_load_fmax">;
557+
defm : LDBFPOPregister_patterns<"LDBFMINNM", "atomic_load_fmin">;
558+
}
559+
546560
// v8.9a/v9.4a FEAT_LRCPC patterns
547561
let Predicates = [HasRCPC3, HasNEON] in {
548562
// LDAP1 loads

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12493,6 +12493,36 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
1249312493
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
1249412494
}
1249512495

12496+
let Predicates = [HasLSFE] in
12497+
multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
12498+
ValueType vt, dag data> {
12499+
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, data),
12500+
(!cast<Instruction>(inst # suffix) data, FPR64:$Rn)>;
12501+
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, data),
12502+
(!cast<Instruction>(inst # "A" # suffix) data, FPR64:$Rn)>;
12503+
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, data),
12504+
(!cast<Instruction>(inst # "L" # suffix) data, FPR64:$Rn)>;
12505+
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, data),
12506+
(!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
12507+
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, data),
12508+
(!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
12509+
}
12510+
12511+
multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
12512+
ValueType vt, dag RHS> {
12513+
defm : LDFPOPregister_patterns_ord_dag<inst, suffix, op, vt, RHS>;
12514+
}
12515+
12516+
multiclass LDFPOPregister_patterns<string inst, string op> {
12517+
defm : LDFPOPregister_patterns_ord<inst, "H", op, f16, (f16 FPR16:$Rm)>;
12518+
defm : LDFPOPregister_patterns_ord<inst, "S", op, f32, (f32 FPR32:$Rm)>;
12519+
defm : LDFPOPregister_patterns_ord<inst, "D", op, f64, (f64 FPR64:$Rm)>;
12520+
}
12521+
12522+
multiclass LDBFPOPregister_patterns<string inst, string op> {
12523+
defm : LDFPOPregister_patterns_ord<inst, "", op, bf16, (bf16 FPR16:$Rm)>;
12524+
}
12525+
1249612526
let Predicates = [HasLSE] in
1249712527
multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
1249812528
ValueType vt, dag OLD, dag NEW> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10564,7 +10564,7 @@ let Predicates = [HasLSFE] in {
1056410564
defm LDFMAXNML : AtomicFPLoad<0b01, 0b110, "ldfmaxnml">;
1056510565
defm LDFMINNMA : AtomicFPLoad<0b10, 0b111, "ldfminnma">;
1056610566
defm LDFMINNMAL : AtomicFPLoad<0b11, 0b111, "ldfminnmal">;
10567-
defm LDFMINMN : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
10567+
defm LDFMINNM : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
1056810568
defm LDFMINNML : AtomicFPLoad<0b01, 0b111, "ldfminnml">;
1056910569
// BFloat16
1057010570
def LDBFADDA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b000, "ldbfadda">;

0 commit comments

Comments
 (0)