Skip to content

Commit 6417ce4

Browse files
heiherSixWeining
authored andcommitted
[LoongArch] Improve codegen for i8/i16 'atomicrmw xchg a, {0,-1}'
Similar to D156801 for RISCV. Link: rust-lang/rust#114034 Link: #64090 Reviewed By: SixWeining, xen0n Differential Revision: https://reviews.llvm.org/D159252
1 parent 555e239 commit 6417ce4

File tree

2 files changed

+40
-116
lines changed

2 files changed

+40
-116
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4184,6 +4184,22 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
41844184
Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
41854185
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
41864186
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4187+
// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4188+
// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4189+
// mask, as this produces better code than the LL/SC loop emitted by
4190+
// int_loongarch_masked_atomicrmw_xchg.
4191+
if (AI->getOperation() == AtomicRMWInst::Xchg &&
4192+
isa<ConstantInt>(AI->getValOperand())) {
4193+
ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4194+
if (CVal->isZero())
4195+
return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4196+
Builder.CreateNot(Mask, "Inv_Mask"),
4197+
AI->getAlign(), Ord);
4198+
if (CVal->isMinusOne())
4199+
return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4200+
AI->getAlign(), Ord);
4201+
}
4202+
41874203
unsigned GRLen = Subtarget.getGRLen();
41884204
Value *Ordering =
41894205
Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));

llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll

Lines changed: 24 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,12 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
5656
; LA32-NEXT: slli.w $a1, $a0, 3
5757
; LA32-NEXT: ori $a2, $zero, 255
5858
; LA32-NEXT: sll.w $a2, $a2, $a1
59+
; LA32-NEXT: nor $a2, $a2, $zero
5960
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
6061
; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
6162
; LA32-NEXT: dbar 0
6263
; LA32-NEXT: ll.w $a3, $a0, 0
63-
; LA32-NEXT: addi.w $a4, $zero, 0
64-
; LA32-NEXT: xor $a4, $a3, $a4
65-
; LA32-NEXT: and $a4, $a4, $a2
66-
; LA32-NEXT: xor $a4, $a3, $a4
64+
; LA32-NEXT: and $a4, $a3, $a2
6765
; LA32-NEXT: sc.w $a4, $a0, 0
6866
; LA32-NEXT: beqz $a4, .LBB1_1
6967
; LA32-NEXT: # %bb.2:
@@ -75,18 +73,9 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
7573
; LA64-NEXT: slli.d $a1, $a0, 3
7674
; LA64-NEXT: ori $a2, $zero, 255
7775
; LA64-NEXT: sll.w $a2, $a2, $a1
78-
; LA64-NEXT: addi.w $a2, $a2, 0
76+
; LA64-NEXT: nor $a2, $a2, $zero
7977
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
80-
; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
81-
; LA64-NEXT: dbar 0
82-
; LA64-NEXT: ll.w $a3, $a0, 0
83-
; LA64-NEXT: addi.w $a4, $zero, 0
84-
; LA64-NEXT: xor $a4, $a3, $a4
85-
; LA64-NEXT: and $a4, $a4, $a2
86-
; LA64-NEXT: xor $a4, $a3, $a4
87-
; LA64-NEXT: sc.w $a4, $a0, 0
88-
; LA64-NEXT: beqz $a4, .LBB1_1
89-
; LA64-NEXT: # %bb.2:
78+
; LA64-NEXT: amand_db.w $a3, $a2, $a0
9079
; LA64-NEXT: srl.w $a0, $a3, $a1
9180
; LA64-NEXT: ret
9281
%1 = atomicrmw xchg ptr %a, i8 0 acquire
@@ -103,10 +92,7 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
10392
; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
10493
; LA32-NEXT: dbar 0
10594
; LA32-NEXT: ll.w $a3, $a0, 0
106-
; LA32-NEXT: addi.w $a4, $a2, 0
107-
; LA32-NEXT: xor $a4, $a3, $a4
108-
; LA32-NEXT: and $a4, $a4, $a2
109-
; LA32-NEXT: xor $a4, $a3, $a4
95+
; LA32-NEXT: or $a4, $a3, $a2
11096
; LA32-NEXT: sc.w $a4, $a0, 0
11197
; LA32-NEXT: beqz $a4, .LBB2_1
11298
; LA32-NEXT: # %bb.2:
@@ -118,18 +104,8 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
118104
; LA64-NEXT: slli.d $a1, $a0, 3
119105
; LA64-NEXT: ori $a2, $zero, 255
120106
; LA64-NEXT: sll.w $a2, $a2, $a1
121-
; LA64-NEXT: addi.w $a2, $a2, 0
122107
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
123-
; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
124-
; LA64-NEXT: dbar 0
125-
; LA64-NEXT: ll.w $a3, $a0, 0
126-
; LA64-NEXT: addi.w $a4, $a2, 0
127-
; LA64-NEXT: xor $a4, $a3, $a4
128-
; LA64-NEXT: and $a4, $a4, $a2
129-
; LA64-NEXT: xor $a4, $a3, $a4
130-
; LA64-NEXT: sc.w $a4, $a0, 0
131-
; LA64-NEXT: beqz $a4, .LBB2_1
132-
; LA64-NEXT: # %bb.2:
108+
; LA64-NEXT: amor_db.w $a3, $a2, $a0
133109
; LA64-NEXT: srl.w $a0, $a3, $a1
134110
; LA64-NEXT: ret
135111
%1 = atomicrmw xchg ptr %a, i8 -1 acquire
@@ -193,14 +169,12 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
193169
; LA32-NEXT: ori $a1, $a1, 4095
194170
; LA32-NEXT: slli.w $a2, $a0, 3
195171
; LA32-NEXT: sll.w $a1, $a1, $a2
172+
; LA32-NEXT: nor $a1, $a1, $zero
196173
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
197174
; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
198175
; LA32-NEXT: dbar 0
199176
; LA32-NEXT: ll.w $a3, $a0, 0
200-
; LA32-NEXT: addi.w $a4, $zero, 0
201-
; LA32-NEXT: xor $a4, $a3, $a4
202-
; LA32-NEXT: and $a4, $a4, $a1
203-
; LA32-NEXT: xor $a4, $a3, $a4
177+
; LA32-NEXT: and $a4, $a3, $a1
204178
; LA32-NEXT: sc.w $a4, $a0, 0
205179
; LA32-NEXT: beqz $a4, .LBB4_1
206180
; LA32-NEXT: # %bb.2:
@@ -213,18 +187,9 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
213187
; LA64-NEXT: ori $a1, $a1, 4095
214188
; LA64-NEXT: slli.d $a2, $a0, 3
215189
; LA64-NEXT: sll.w $a1, $a1, $a2
216-
; LA64-NEXT: addi.w $a1, $a1, 0
190+
; LA64-NEXT: nor $a1, $a1, $zero
217191
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
218-
; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
219-
; LA64-NEXT: dbar 0
220-
; LA64-NEXT: ll.w $a3, $a0, 0
221-
; LA64-NEXT: addi.w $a4, $zero, 0
222-
; LA64-NEXT: xor $a4, $a3, $a4
223-
; LA64-NEXT: and $a4, $a4, $a1
224-
; LA64-NEXT: xor $a4, $a3, $a4
225-
; LA64-NEXT: sc.w $a4, $a0, 0
226-
; LA64-NEXT: beqz $a4, .LBB4_1
227-
; LA64-NEXT: # %bb.2:
192+
; LA64-NEXT: amand_db.w $a3, $a1, $a0
228193
; LA64-NEXT: srl.w $a0, $a3, $a2
229194
; LA64-NEXT: ret
230195
%1 = atomicrmw xchg ptr %a, i16 0 acquire
@@ -242,10 +207,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
242207
; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
243208
; LA32-NEXT: dbar 0
244209
; LA32-NEXT: ll.w $a3, $a0, 0
245-
; LA32-NEXT: addi.w $a4, $a1, 0
246-
; LA32-NEXT: xor $a4, $a3, $a4
247-
; LA32-NEXT: and $a4, $a4, $a1
248-
; LA32-NEXT: xor $a4, $a3, $a4
210+
; LA32-NEXT: or $a4, $a3, $a1
249211
; LA32-NEXT: sc.w $a4, $a0, 0
250212
; LA32-NEXT: beqz $a4, .LBB5_1
251213
; LA32-NEXT: # %bb.2:
@@ -258,18 +220,8 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
258220
; LA64-NEXT: ori $a1, $a1, 4095
259221
; LA64-NEXT: slli.d $a2, $a0, 3
260222
; LA64-NEXT: sll.w $a1, $a1, $a2
261-
; LA64-NEXT: addi.w $a1, $a1, 0
262223
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
263-
; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
264-
; LA64-NEXT: dbar 0
265-
; LA64-NEXT: ll.w $a3, $a0, 0
266-
; LA64-NEXT: addi.w $a4, $a1, 0
267-
; LA64-NEXT: xor $a4, $a3, $a4
268-
; LA64-NEXT: and $a4, $a4, $a1
269-
; LA64-NEXT: xor $a4, $a3, $a4
270-
; LA64-NEXT: sc.w $a4, $a0, 0
271-
; LA64-NEXT: beqz $a4, .LBB5_1
272-
; LA64-NEXT: # %bb.2:
224+
; LA64-NEXT: amor_db.w $a3, $a1, $a0
273225
; LA64-NEXT: srl.w $a0, $a3, $a2
274226
; LA64-NEXT: ret
275227
%1 = atomicrmw xchg ptr %a, i16 -1 acquire
@@ -1131,13 +1083,11 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
11311083
; LA32-NEXT: slli.w $a1, $a0, 3
11321084
; LA32-NEXT: ori $a2, $zero, 255
11331085
; LA32-NEXT: sll.w $a2, $a2, $a1
1086+
; LA32-NEXT: nor $a2, $a2, $zero
11341087
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
11351088
; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
11361089
; LA32-NEXT: ll.w $a3, $a0, 0
1137-
; LA32-NEXT: addi.w $a4, $zero, 0
1138-
; LA32-NEXT: xor $a4, $a3, $a4
1139-
; LA32-NEXT: and $a4, $a4, $a2
1140-
; LA32-NEXT: xor $a4, $a3, $a4
1090+
; LA32-NEXT: and $a4, $a3, $a2
11411091
; LA32-NEXT: sc.w $a4, $a0, 0
11421092
; LA32-NEXT: beqz $a4, .LBB33_1
11431093
; LA32-NEXT: # %bb.2:
@@ -1149,17 +1099,9 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
11491099
; LA64-NEXT: slli.d $a1, $a0, 3
11501100
; LA64-NEXT: ori $a2, $zero, 255
11511101
; LA64-NEXT: sll.w $a2, $a2, $a1
1152-
; LA64-NEXT: addi.w $a2, $a2, 0
1102+
; LA64-NEXT: nor $a2, $a2, $zero
11531103
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
1154-
; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
1155-
; LA64-NEXT: ll.w $a3, $a0, 0
1156-
; LA64-NEXT: addi.w $a4, $zero, 0
1157-
; LA64-NEXT: xor $a4, $a3, $a4
1158-
; LA64-NEXT: and $a4, $a4, $a2
1159-
; LA64-NEXT: xor $a4, $a3, $a4
1160-
; LA64-NEXT: sc.w $a4, $a0, 0
1161-
; LA64-NEXT: beqz $a4, .LBB33_1
1162-
; LA64-NEXT: # %bb.2:
1104+
; LA64-NEXT: amand_db.w $a3, $a2, $a0
11631105
; LA64-NEXT: srl.w $a0, $a3, $a1
11641106
; LA64-NEXT: ret
11651107
%1 = atomicrmw xchg ptr %a, i8 0 monotonic
@@ -1175,10 +1117,7 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
11751117
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
11761118
; LA32-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
11771119
; LA32-NEXT: ll.w $a3, $a0, 0
1178-
; LA32-NEXT: addi.w $a4, $a2, 0
1179-
; LA32-NEXT: xor $a4, $a3, $a4
1180-
; LA32-NEXT: and $a4, $a4, $a2
1181-
; LA32-NEXT: xor $a4, $a3, $a4
1120+
; LA32-NEXT: or $a4, $a3, $a2
11821121
; LA32-NEXT: sc.w $a4, $a0, 0
11831122
; LA32-NEXT: beqz $a4, .LBB34_1
11841123
; LA32-NEXT: # %bb.2:
@@ -1190,17 +1129,8 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
11901129
; LA64-NEXT: slli.d $a1, $a0, 3
11911130
; LA64-NEXT: ori $a2, $zero, 255
11921131
; LA64-NEXT: sll.w $a2, $a2, $a1
1193-
; LA64-NEXT: addi.w $a2, $a2, 0
11941132
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
1195-
; LA64-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
1196-
; LA64-NEXT: ll.w $a3, $a0, 0
1197-
; LA64-NEXT: addi.w $a4, $a2, 0
1198-
; LA64-NEXT: xor $a4, $a3, $a4
1199-
; LA64-NEXT: and $a4, $a4, $a2
1200-
; LA64-NEXT: xor $a4, $a3, $a4
1201-
; LA64-NEXT: sc.w $a4, $a0, 0
1202-
; LA64-NEXT: beqz $a4, .LBB34_1
1203-
; LA64-NEXT: # %bb.2:
1133+
; LA64-NEXT: amor_db.w $a3, $a2, $a0
12041134
; LA64-NEXT: srl.w $a0, $a3, $a1
12051135
; LA64-NEXT: ret
12061136
%1 = atomicrmw xchg ptr %a, i8 -1 monotonic
@@ -1262,13 +1192,11 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
12621192
; LA32-NEXT: ori $a1, $a1, 4095
12631193
; LA32-NEXT: slli.w $a2, $a0, 3
12641194
; LA32-NEXT: sll.w $a1, $a1, $a2
1195+
; LA32-NEXT: nor $a1, $a1, $zero
12651196
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
12661197
; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
12671198
; LA32-NEXT: ll.w $a3, $a0, 0
1268-
; LA32-NEXT: addi.w $a4, $zero, 0
1269-
; LA32-NEXT: xor $a4, $a3, $a4
1270-
; LA32-NEXT: and $a4, $a4, $a1
1271-
; LA32-NEXT: xor $a4, $a3, $a4
1199+
; LA32-NEXT: and $a4, $a3, $a1
12721200
; LA32-NEXT: sc.w $a4, $a0, 0
12731201
; LA32-NEXT: beqz $a4, .LBB36_1
12741202
; LA32-NEXT: # %bb.2:
@@ -1281,17 +1209,9 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
12811209
; LA64-NEXT: ori $a1, $a1, 4095
12821210
; LA64-NEXT: slli.d $a2, $a0, 3
12831211
; LA64-NEXT: sll.w $a1, $a1, $a2
1284-
; LA64-NEXT: addi.w $a1, $a1, 0
1212+
; LA64-NEXT: nor $a1, $a1, $zero
12851213
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
1286-
; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
1287-
; LA64-NEXT: ll.w $a3, $a0, 0
1288-
; LA64-NEXT: addi.w $a4, $zero, 0
1289-
; LA64-NEXT: xor $a4, $a3, $a4
1290-
; LA64-NEXT: and $a4, $a4, $a1
1291-
; LA64-NEXT: xor $a4, $a3, $a4
1292-
; LA64-NEXT: sc.w $a4, $a0, 0
1293-
; LA64-NEXT: beqz $a4, .LBB36_1
1294-
; LA64-NEXT: # %bb.2:
1214+
; LA64-NEXT: amand_db.w $a3, $a1, $a0
12951215
; LA64-NEXT: srl.w $a0, $a3, $a2
12961216
; LA64-NEXT: ret
12971217
%1 = atomicrmw xchg ptr %a, i16 0 monotonic
@@ -1308,10 +1228,7 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
13081228
; LA32-NEXT: bstrins.w $a0, $zero, 1, 0
13091229
; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
13101230
; LA32-NEXT: ll.w $a3, $a0, 0
1311-
; LA32-NEXT: addi.w $a4, $a1, 0
1312-
; LA32-NEXT: xor $a4, $a3, $a4
1313-
; LA32-NEXT: and $a4, $a4, $a1
1314-
; LA32-NEXT: xor $a4, $a3, $a4
1231+
; LA32-NEXT: or $a4, $a3, $a1
13151232
; LA32-NEXT: sc.w $a4, $a0, 0
13161233
; LA32-NEXT: beqz $a4, .LBB37_1
13171234
; LA32-NEXT: # %bb.2:
@@ -1324,17 +1241,8 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
13241241
; LA64-NEXT: ori $a1, $a1, 4095
13251242
; LA64-NEXT: slli.d $a2, $a0, 3
13261243
; LA64-NEXT: sll.w $a1, $a1, $a2
1327-
; LA64-NEXT: addi.w $a1, $a1, 0
13281244
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
1329-
; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
1330-
; LA64-NEXT: ll.w $a3, $a0, 0
1331-
; LA64-NEXT: addi.w $a4, $a1, 0
1332-
; LA64-NEXT: xor $a4, $a3, $a4
1333-
; LA64-NEXT: and $a4, $a4, $a1
1334-
; LA64-NEXT: xor $a4, $a3, $a4
1335-
; LA64-NEXT: sc.w $a4, $a0, 0
1336-
; LA64-NEXT: beqz $a4, .LBB37_1
1337-
; LA64-NEXT: # %bb.2:
1245+
; LA64-NEXT: amor_db.w $a3, $a1, $a0
13381246
; LA64-NEXT: srl.w $a0, $a3, $a2
13391247
; LA64-NEXT: ret
13401248
%1 = atomicrmw xchg ptr %a, i16 -1 monotonic

0 commit comments

Comments
 (0)