Skip to content

Commit 9fef09f

Browse files
authored
[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)
`llvm::ConstantFP::get(llvm::LLVMContext&, APFloat(float))` always returns a f32 constant. Fix #107054.
1 parent 8b28e2e commit 9fef09f

File tree

4 files changed

+339
-179
lines changed

4 files changed

+339
-179
lines changed

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2865,19 +2865,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
28652865
llvm::AtomicOrdering::SequentiallyConsistent);
28662866
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
28672867
}
2868-
// Special case for atomic increment/decrement on floats
2868+
// Special case for atomic increment/decrement on floats.
2869+
// Bail out non-power-of-2-sized floating point types (e.g., x86_fp80).
28692870
if (type->isFloatingType()) {
2870-
llvm::AtomicRMWInst::BinOp aop =
2871-
isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
2872-
llvm::Instruction::BinaryOps op =
2873-
isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
2874-
llvm::Value *amt = llvm::ConstantFP::get(
2875-
VMContext, llvm::APFloat(static_cast<float>(1.0)));
2876-
llvm::AtomicRMWInst *old =
2877-
CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt,
2878-
llvm::AtomicOrdering::SequentiallyConsistent);
2871+
llvm::Type *Ty = ConvertType(type);
2872+
if (llvm::has_single_bit(Ty->getScalarSizeInBits())) {
2873+
llvm::AtomicRMWInst::BinOp aop =
2874+
isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
2875+
llvm::Instruction::BinaryOps op =
2876+
isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
2877+
llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0);
2878+
llvm::AtomicRMWInst *old =
2879+
CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt,
2880+
llvm::AtomicOrdering::SequentiallyConsistent);
28792881

2880-
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
2882+
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
2883+
}
28812884
}
28822885
value = EmitLoadOfLValue(LV, E->getExprLoc());
28832886
input = value;

clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c

Lines changed: 40 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -99,20 +99,16 @@ float test_float_pre_inc()
9999
// SAFE-NEXT: [[ENTRY:.*:]]
100100
// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
101101
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
102-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
103-
// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
104-
// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
105-
// SAFE-NEXT: ret double [[TMP1]]
102+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8
103+
// SAFE-NEXT: ret double [[TMP0]]
106104
//
107105
// UNSAFE-LABEL: define dso_local double @test_double_post_inc(
108106
// UNSAFE-SAME: ) #[[ATTR0]] {
109107
// UNSAFE-NEXT: [[ENTRY:.*:]]
110108
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
111109
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
112-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
113-
// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
114-
// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
115-
// UNSAFE-NEXT: ret double [[TMP1]]
110+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
111+
// UNSAFE-NEXT: ret double [[TMP0]]
116112
//
117113
double test_double_post_inc()
118114
{
@@ -125,20 +121,16 @@ double test_double_post_inc()
125121
// SAFE-NEXT: [[ENTRY:.*:]]
126122
// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
127123
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
128-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
129-
// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
130-
// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
131-
// SAFE-NEXT: ret double [[TMP1]]
124+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8
125+
// SAFE-NEXT: ret double [[TMP0]]
132126
//
133127
// UNSAFE-LABEL: define dso_local double @test_double_post_dc(
134128
// UNSAFE-SAME: ) #[[ATTR0]] {
135129
// UNSAFE-NEXT: [[ENTRY:.*:]]
136130
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
137131
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
138-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
139-
// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
140-
// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
141-
// UNSAFE-NEXT: ret double [[TMP1]]
132+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
133+
// UNSAFE-NEXT: ret double [[TMP0]]
142134
//
143135
double test_double_post_dc()
144136
{
@@ -151,22 +143,18 @@ double test_double_post_dc()
151143
// SAFE-NEXT: [[ENTRY:.*:]]
152144
// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
153145
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
154-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
155-
// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
156-
// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
157-
// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
158-
// SAFE-NEXT: ret double [[TMP2]]
146+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8
147+
// SAFE-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
148+
// SAFE-NEXT: ret double [[TMP1]]
159149
//
160150
// UNSAFE-LABEL: define dso_local double @test_double_pre_dc(
161151
// UNSAFE-SAME: ) #[[ATTR0]] {
162152
// UNSAFE-NEXT: [[ENTRY:.*:]]
163153
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
164154
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
165-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
166-
// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
167-
// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
168-
// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
169-
// UNSAFE-NEXT: ret double [[TMP2]]
155+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
156+
// UNSAFE-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
157+
// UNSAFE-NEXT: ret double [[TMP1]]
170158
//
171159
double test_double_pre_dc()
172160
{
@@ -179,22 +167,18 @@ double test_double_pre_dc()
179167
// SAFE-NEXT: [[ENTRY:.*:]]
180168
// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
181169
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
182-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
183-
// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
184-
// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
185-
// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
186-
// SAFE-NEXT: ret double [[TMP2]]
170+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8
171+
// SAFE-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
172+
// SAFE-NEXT: ret double [[TMP1]]
187173
//
188174
// UNSAFE-LABEL: define dso_local double @test_double_pre_inc(
189175
// UNSAFE-SAME: ) #[[ATTR0]] {
190176
// UNSAFE-NEXT: [[ENTRY:.*:]]
191177
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
192178
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
193-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
194-
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
195-
// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
196-
// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
197-
// UNSAFE-NEXT: ret double [[TMP2]]
179+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
180+
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
181+
// UNSAFE-NEXT: ret double [[TMP1]]
198182
//
199183
double test_double_pre_inc()
200184
{
@@ -207,20 +191,16 @@ double test_double_pre_inc()
207191
// SAFE-NEXT: [[ENTRY:.*:]]
208192
// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
209193
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
210-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2
211-
// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
212-
// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
213-
// SAFE-NEXT: ret half [[TMP1]]
194+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2
195+
// SAFE-NEXT: ret half [[TMP0]]
214196
//
215197
// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc(
216198
// UNSAFE-SAME: ) #[[ATTR0]] {
217199
// UNSAFE-NEXT: [[ENTRY:.*:]]
218200
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
219201
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
220-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
221-
// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
222-
// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
223-
// UNSAFE-NEXT: ret half [[TMP1]]
202+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
203+
// UNSAFE-NEXT: ret half [[TMP0]]
224204
//
225205
_Float16 test__Float16_post_inc()
226206
{
@@ -233,20 +213,16 @@ _Float16 test__Float16_post_inc()
233213
// SAFE-NEXT: [[ENTRY:.*:]]
234214
// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
235215
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
236-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2
237-
// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
238-
// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
239-
// SAFE-NEXT: ret half [[TMP1]]
216+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2
217+
// SAFE-NEXT: ret half [[TMP0]]
240218
//
241219
// UNSAFE-LABEL: define dso_local half @test__Float16_post_dc(
242220
// UNSAFE-SAME: ) #[[ATTR0]] {
243221
// UNSAFE-NEXT: [[ENTRY:.*:]]
244222
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
245223
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
246-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
247-
// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
248-
// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
249-
// UNSAFE-NEXT: ret half [[TMP1]]
224+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
225+
// UNSAFE-NEXT: ret half [[TMP0]]
250226
//
251227
_Float16 test__Float16_post_dc()
252228
{
@@ -259,22 +235,18 @@ _Float16 test__Float16_post_dc()
259235
// SAFE-NEXT: [[ENTRY:.*:]]
260236
// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
261237
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
262-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2
263-
// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
264-
// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2
265-
// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
266-
// SAFE-NEXT: ret half [[TMP2]]
238+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2
239+
// SAFE-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
240+
// SAFE-NEXT: ret half [[TMP1]]
267241
//
268242
// UNSAFE-LABEL: define dso_local half @test__Float16_pre_dc(
269243
// UNSAFE-SAME: ) #[[ATTR0]] {
270244
// UNSAFE-NEXT: [[ENTRY:.*:]]
271245
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
272246
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
273-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
274-
// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
275-
// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2
276-
// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
277-
// UNSAFE-NEXT: ret half [[TMP2]]
247+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
248+
// UNSAFE-NEXT: [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
249+
// UNSAFE-NEXT: ret half [[TMP1]]
278250
//
279251
_Float16 test__Float16_pre_dc()
280252
{
@@ -287,22 +259,18 @@ _Float16 test__Float16_pre_dc()
287259
// SAFE-NEXT: [[ENTRY:.*:]]
288260
// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
289261
// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
290-
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2
291-
// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
292-
// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2
293-
// SAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
294-
// SAFE-NEXT: ret half [[TMP2]]
262+
// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2
263+
// SAFE-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
264+
// SAFE-NEXT: ret half [[TMP1]]
295265
//
296266
// UNSAFE-LABEL: define dso_local half @test__Float16_pre_inc(
297267
// UNSAFE-SAME: ) #[[ATTR0]] {
298268
// UNSAFE-NEXT: [[ENTRY:.*:]]
299269
// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
300270
// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
301-
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
302-
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
303-
// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 2
304-
// UNSAFE-NEXT: [[TMP2:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
305-
// UNSAFE-NEXT: ret half [[TMP2]]
271+
// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]]
272+
// UNSAFE-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
273+
// UNSAFE-NEXT: ret half [[TMP1]]
306274
//
307275
_Float16 test__Float16_pre_inc()
308276
{

0 commit comments

Comments
 (0)