Skip to content

Commit aec87a2

Browse files
[llvm][mlir][flang][OpenMP] Emit __atomic_load and __atomic_compare_exchange libcalls for complex types in atomic update (#92364)
This patch adds functionality to emit relevant libcalls in case atomicrmw instruction can not be emitted (for instance, in case of complex types). The IRBuilder is modified to directly emit __atomic_load and __atomic_compare_exchange libcalls. The added functions follow a similar codegen path as Clang, so that LLVM Flang generates almost similar IR as Clang. Fixes #83760 and #75138 Co-authored-by: Michael Kruse <[email protected]>
1 parent 8a57d82 commit aec87a2

File tree

12 files changed

+563
-28
lines changed

12 files changed

+563
-28
lines changed

flang/lib/Lower/DirectivesCommon.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,7 @@ static void processOmpAtomicTODO(mlir::Type elementType,
129129
// Based on assertion for supported element types in OMPIRBuilder.cpp
130130
// createAtomicRead
131131
mlir::Type unwrappedEleTy = fir::unwrapRefType(elementType);
132-
bool supportedAtomicType =
133-
(!fir::isa_complex(unwrappedEleTy) && fir::isa_trivial(unwrappedEleTy));
132+
bool supportedAtomicType = fir::isa_trivial(unwrappedEleTy);
134133
if (!supportedAtomicType)
135134
TODO(loc, "Unsupported atomic type");
136135
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
10+
11+
!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8
12+
!CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8
13+
!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8
14+
!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4
15+
!CHECK: br label %entry
16+
17+
!CHECK: entry:
18+
!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8
19+
!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0)
20+
!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8
21+
!CHECK: br label %.atomic.cont
22+
23+
!CHECK: .atomic.cont
24+
!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ]
25+
!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0
26+
!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1
27+
!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00
28+
!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00
29+
!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0
30+
!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1
31+
!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4
32+
!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]],
33+
!i32 2, i32 2)
34+
!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4
35+
!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont
36+
37+
!CHECK: .atomic.exit
38+
!CHECK: store { float, float } %[[VAL_10]], ptr %[[VAL_1]], align 4
39+
40+
program main
41+
complex*8 ia, ib
42+
ia = (2, 2)
43+
!$omp atomic capture
44+
ia = ia + (1, 1)
45+
ib = ia
46+
!$omp end atomic
47+
end program
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
10+
11+
!CHECK: define void @_QQmain() {
12+
!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8
13+
!CHECK: {{.*}} = alloca { float, float }, i64 1, align 8
14+
!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8
15+
!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4
16+
!CHECK: br label %entry
17+
18+
!CHECK: entry:
19+
!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8
20+
!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0)
21+
!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8
22+
!CHECK: br label %.atomic.cont
23+
24+
!CHECK: .atomic.cont
25+
!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ]
26+
!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0
27+
!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1
28+
!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00
29+
!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00
30+
!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0
31+
!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1
32+
!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4
33+
!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2)
34+
!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4
35+
!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont
36+
program main
37+
complex*8 ia, ib
38+
ia = (2, 2)
39+
!$omp atomic update
40+
ia = ia + (1, 1)
41+
!$omp end atomic
42+
end program

flang/test/Lower/OpenMP/Todo/atomic-complex.f90

-8
This file was deleted.
+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
//===--- Atomic.h - Codegen of atomic operations
2+
//---------------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_FRONTEND_ATOMIC_ATOMIC_H
11+
#define LLVM_FRONTEND_ATOMIC_ATOMIC_H
12+
13+
#include "llvm/ADT/DenseMap.h"
14+
#include "llvm/IR/DataLayout.h"
15+
#include "llvm/IR/Instructions.h"
16+
#include "llvm/IR/Intrinsics.h"
17+
#include "llvm/IR/Module.h"
18+
#include "llvm/IR/Operator.h"
19+
#include "llvm/IR/RuntimeLibcalls.h"
20+
21+
namespace llvm {
22+
23+
template <typename IRBuilderTy> struct AtomicInfo {
24+
25+
IRBuilderTy *Builder;
26+
Type *Ty;
27+
uint64_t AtomicSizeInBits;
28+
uint64_t ValueSizeInBits;
29+
llvm::Align AtomicAlign;
30+
llvm::Align ValueAlign;
31+
bool UseLibcall;
32+
33+
public:
34+
AtomicInfo(IRBuilderTy *Builder, Type *Ty, uint64_t AtomicSizeInBits,
35+
uint64_t ValueSizeInBits, llvm::Align AtomicAlign,
36+
llvm::Align ValueAlign, bool UseLibcall)
37+
: Builder(Builder), Ty(Ty), AtomicSizeInBits(AtomicSizeInBits),
38+
ValueSizeInBits(ValueSizeInBits), AtomicAlign(AtomicAlign),
39+
ValueAlign(ValueAlign), UseLibcall(UseLibcall) {}
40+
41+
virtual ~AtomicInfo() = default;
42+
43+
llvm::Align getAtomicAlignment() const { return AtomicAlign; }
44+
uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
45+
uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
46+
bool shouldUseLibcall() const { return UseLibcall; }
47+
llvm::Type *getAtomicTy() const { return Ty; }
48+
49+
virtual llvm::Value *getAtomicPointer() const = 0;
50+
virtual void decorateWithTBAA(Instruction *I) = 0;
51+
virtual llvm::AllocaInst *CreateAlloca(llvm::Type *Ty,
52+
const llvm::Twine &Name) const = 0;
53+
54+
/*
55+
* Is the atomic size larger than the underlying value type?
56+
* Note that the absence of padding does not mean that atomic
57+
* objects are completely interchangeable with non-atomic
58+
* objects: we might have promoted the alignment of a type
59+
* without making it bigger.
60+
*/
61+
bool hasPadding() const { return (ValueSizeInBits != AtomicSizeInBits); }
62+
63+
LLVMContext &getLLVMContext() const { return Builder->getContext(); }
64+
65+
static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) {
66+
if (ValTy->isFloatingPointTy())
67+
return ValTy->isX86_FP80Ty() || CmpXchg;
68+
return !ValTy->isIntegerTy() && !ValTy->isPointerTy();
69+
}
70+
71+
llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
72+
bool CmpXchg = false) {
73+
Value *Ptr = getAtomicPointer();
74+
Type *AtomicTy = Ty;
75+
if (shouldCastToInt(Ty, CmpXchg))
76+
AtomicTy = llvm::IntegerType::get(getLLVMContext(), AtomicSizeInBits);
77+
LoadInst *Load =
78+
Builder->CreateAlignedLoad(AtomicTy, Ptr, AtomicAlign, "atomic-load");
79+
Load->setAtomic(AO);
80+
if (IsVolatile)
81+
Load->setVolatile(true);
82+
decorateWithTBAA(Load);
83+
return Load;
84+
}
85+
86+
static CallInst *EmitAtomicLibcall(IRBuilderTy *Builder, StringRef fnName,
87+
Type *ResultType, ArrayRef<Value *> Args) {
88+
LLVMContext &ctx = Builder->getContext();
89+
SmallVector<Type *, 6> ArgTys;
90+
for (Value *Arg : Args)
91+
ArgTys.push_back(Arg->getType());
92+
FunctionType *FnType = FunctionType::get(ResultType, ArgTys, false);
93+
Module *M = Builder->GetInsertBlock()->getModule();
94+
95+
// TODO: Use llvm::TargetLowering for Libcall ABI
96+
llvm::AttrBuilder fnAttrBuilder(ctx);
97+
fnAttrBuilder.addAttribute(llvm::Attribute::NoUnwind);
98+
fnAttrBuilder.addAttribute(llvm::Attribute::WillReturn);
99+
llvm::AttributeList fnAttrs = llvm::AttributeList::get(
100+
ctx, llvm::AttributeList::FunctionIndex, fnAttrBuilder);
101+
FunctionCallee LibcallFn = M->getOrInsertFunction(fnName, FnType, fnAttrs);
102+
CallInst *Call = Builder->CreateCall(LibcallFn, Args);
103+
return Call;
104+
}
105+
106+
llvm::Value *getAtomicSizeValue() const {
107+
LLVMContext &ctx = getLLVMContext();
108+
109+
// TODO: Get from llvm::TargetMachine / clang::TargetInfo
110+
// if clang shares this codegen in future
111+
constexpr uint16_t SizeTBits = 64;
112+
constexpr uint16_t BitsPerByte = 8;
113+
return llvm::ConstantInt::get(llvm::IntegerType::get(ctx, SizeTBits),
114+
AtomicSizeInBits / BitsPerByte);
115+
}
116+
117+
std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall(
118+
llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
119+
llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure) {
120+
LLVMContext &ctx = getLLVMContext();
121+
122+
// __atomic_compare_exchange's expected and desired are passed by pointers
123+
// FIXME: types
124+
125+
// TODO: Get from llvm::TargetMachine / clang::TargetInfo
126+
// if clang shares this codegen in future
127+
constexpr uint64_t IntBits = 32;
128+
129+
// bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
130+
// void *desired, int success, int failure);
131+
llvm::Value *Args[6] = {
132+
getAtomicSizeValue(),
133+
getAtomicPointer(),
134+
ExpectedVal,
135+
DesiredVal,
136+
llvm::Constant::getIntegerValue(
137+
llvm::IntegerType::get(ctx, IntBits),
138+
llvm::APInt(IntBits, static_cast<uint64_t>(Success),
139+
/*signed=*/true)),
140+
llvm::Constant::getIntegerValue(
141+
llvm::IntegerType::get(ctx, IntBits),
142+
llvm::APInt(IntBits, static_cast<uint64_t>(Failure),
143+
/*signed=*/true)),
144+
};
145+
auto Result = EmitAtomicLibcall(Builder, "__atomic_compare_exchange",
146+
llvm::IntegerType::getInt1Ty(ctx), Args);
147+
return std::make_pair(ExpectedVal, Result);
148+
}
149+
150+
Value *castToAtomicIntPointer(Value *addr) const {
151+
return addr; // opaque pointer
152+
}
153+
154+
Value *getAtomicAddressAsAtomicIntPointer() const {
155+
return castToAtomicIntPointer(getAtomicPointer());
156+
}
157+
158+
std::pair<llvm::Value *, llvm::Value *>
159+
EmitAtomicCompareExchangeOp(llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
160+
llvm::AtomicOrdering Success,
161+
llvm::AtomicOrdering Failure,
162+
bool IsVolatile = false, bool IsWeak = false) {
163+
// Do the atomic store.
164+
Value *Addr = getAtomicAddressAsAtomicIntPointer();
165+
auto *Inst = Builder->CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
166+
getAtomicAlignment(), Success,
167+
Failure, llvm::SyncScope::System);
168+
// Other decoration.
169+
Inst->setVolatile(IsVolatile);
170+
Inst->setWeak(IsWeak);
171+
172+
auto *PreviousVal = Builder->CreateExtractValue(Inst, /*Idxs=*/0);
173+
auto *SuccessFailureVal = Builder->CreateExtractValue(Inst, /*Idxs=*/1);
174+
return std::make_pair(PreviousVal, SuccessFailureVal);
175+
}
176+
177+
std::pair<llvm::Value *, llvm::Value *>
178+
EmitAtomicCompareExchange(llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
179+
llvm::AtomicOrdering Success,
180+
llvm::AtomicOrdering Failure, bool IsVolatile,
181+
bool IsWeak) {
182+
if (shouldUseLibcall())
183+
return EmitAtomicCompareExchangeLibcall(ExpectedVal, DesiredVal, Success,
184+
Failure);
185+
186+
auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
187+
Failure, IsVolatile, IsWeak);
188+
return Res;
189+
}
190+
191+
// void __atomic_load(size_t size, void *mem, void *return, int order);
192+
std::pair<llvm::LoadInst *, llvm::AllocaInst *>
193+
EmitAtomicLoadLibcall(llvm::AtomicOrdering AO) {
194+
LLVMContext &Ctx = getLLVMContext();
195+
Type *SizedIntTy = Type::getIntNTy(Ctx, getAtomicSizeInBits());
196+
Type *ResultTy;
197+
SmallVector<Value *, 6> Args;
198+
AttributeList Attr;
199+
Module *M = Builder->GetInsertBlock()->getModule();
200+
const DataLayout &DL = M->getDataLayout();
201+
Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx),
202+
this->getAtomicSizeInBits() / 8));
203+
204+
Value *PtrVal = getAtomicPointer();
205+
PtrVal = Builder->CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
206+
Args.push_back(PtrVal);
207+
AllocaInst *AllocaResult =
208+
CreateAlloca(Ty, getAtomicPointer()->getName() + "atomic.temp.load");
209+
const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
210+
AllocaResult->setAlignment(AllocaAlignment);
211+
Args.push_back(AllocaResult);
212+
Constant *OrderingVal =
213+
ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(AO));
214+
Args.push_back(OrderingVal);
215+
216+
ResultTy = Type::getVoidTy(Ctx);
217+
SmallVector<Type *, 6> ArgTys;
218+
for (Value *Arg : Args)
219+
ArgTys.push_back(Arg->getType());
220+
FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
221+
FunctionCallee LibcallFn =
222+
M->getOrInsertFunction("__atomic_load", FnType, Attr);
223+
CallInst *Call = Builder->CreateCall(LibcallFn, Args);
224+
Call->setAttributes(Attr);
225+
return std::make_pair(
226+
Builder->CreateAlignedLoad(Ty, AllocaResult, AllocaAlignment),
227+
AllocaResult);
228+
}
229+
};
230+
} // end namespace llvm
231+
232+
#endif /* LLVM_FRONTEND_ATOMIC_ATOMIC_H */

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

+31
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1616

1717
#include "llvm/Analysis/MemorySSAUpdater.h"
18+
#include "llvm/Frontend/Atomic/Atomic.h"
1819
#include "llvm/Frontend/OpenMP/OMPConstants.h"
1920
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
2021
#include "llvm/IR/DebugLoc.h"
@@ -479,6 +480,27 @@ class OpenMPIRBuilder {
479480
T(Triple(M.getTargetTriple())) {}
480481
~OpenMPIRBuilder();
481482

483+
class AtomicInfo : public llvm::AtomicInfo<IRBuilder<>> {
484+
llvm::Value *AtomicVar;
485+
486+
public:
487+
AtomicInfo(IRBuilder<> *Builder, llvm::Type *Ty, uint64_t AtomicSizeInBits,
488+
uint64_t ValueSizeInBits, llvm::Align AtomicAlign,
489+
llvm::Align ValueAlign, bool UseLibcall, llvm::Value *AtomicVar)
490+
: llvm::AtomicInfo<IRBuilder<>>(Builder, Ty, AtomicSizeInBits,
491+
ValueSizeInBits, AtomicAlign,
492+
ValueAlign, UseLibcall),
493+
AtomicVar(AtomicVar) {}
494+
495+
llvm::Value *getAtomicPointer() const override { return AtomicVar; }
496+
void decorateWithTBAA(llvm::Instruction *I) override {}
497+
llvm::AllocaInst *CreateAlloca(llvm::Type *Ty,
498+
const llvm::Twine &Name) const override {
499+
llvm::AllocaInst *allocaInst = Builder->CreateAlloca(Ty);
500+
allocaInst->setName(Name);
501+
return allocaInst;
502+
}
503+
};
482504
/// Initialize the internal state, this will put structures types and
483505
/// potentially other helpers into the underlying module. Must be called
484506
/// before any other method and only once! This internal state includes types
@@ -3039,6 +3061,15 @@ class OpenMPIRBuilder {
30393061
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
30403062
bool IsXBinopExpr);
30413063

3064+
std::pair<llvm::LoadInst *, llvm::AllocaInst *>
3065+
EmitAtomicLoadLibcall(Value *X, Type *XElemTy, llvm::AtomicOrdering AO,
3066+
uint64_t AtomicSizeInBits);
3067+
3068+
std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall(
3069+
Value *X, Type *XElemTy, uint64_t AtomicSizeInBits,
3070+
llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
3071+
llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure);
3072+
30423073
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
30433074
///
30443075
/// \Return The instruction

0 commit comments

Comments
 (0)