Skip to content

Commit 9cf6867

Browse files
authored
[ARM] Fix failure to register-allocate CMP_SWAP_64 pseudo-inst (#106721)
This test case was failing to compile with a "ran out of registers during register allocation" error at -O0. This was because CMP_SWAP_64 has 3 operands which must be an even-odd register pair, and two other GPR operands. All of the def operands are also early-clobber, so registers can't be shared between uses and defs. Because the function has an over-aligned alloca it needs frame and base pointers, so r6 and r11 are both reserved. That leaves r0/r1, r2/r3, r4/r5 and r8/r9 as the only valid register pairs, and if the two individual GPR operands happen to get allocated to registers in different pairs then only 2 pairs will be available for the three GPRPair operands. To fix this, I've merged the two GPR operands into a single GPRPair operand. This means that the instruction now has 4 GPRPair operands, which can always be allocated without relying on luck. This does constrain register allocation a bit more, but this pseudo instruction is only used at -O0, so I don't think that's a problem.
1 parent 30cc198 commit 9cf6867

File tree

9 files changed

+832
-566
lines changed

9 files changed

+832
-566
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,11 +1942,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
19421942
MachineInstr &MI = *MBBI;
19431943
DebugLoc DL = MI.getDebugLoc();
19441944
MachineOperand &Dest = MI.getOperand(0);
1945-
Register TempReg = MI.getOperand(1).getReg();
19461945
// Duplicating undef operands into 2 instructions does not guarantee the same
19471946
// value on both; However undef should be replaced by xzr anyway.
1948-
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
1949-
Register AddrReg = MI.getOperand(2).getReg();
1947+
assert(!MI.getOperand(1).isUndef() && "cannot handle undef");
1948+
Register AddrAndTempReg = MI.getOperand(1).getReg();
1949+
Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0);
1950+
Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1);
1951+
assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
1952+
"tied operands have different registers");
19501953
Register DesiredReg = MI.getOperand(3).getReg();
19511954
MachineOperand New = MI.getOperand(4);
19521955
New.setIsKill(false);

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10479,33 +10479,42 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
1047910479
Results.push_back(Cycles32.getValue(1));
1048010480
}
1048110481

10482-
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
10483-
SDLoc dl(V.getNode());
10484-
auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10485-
bool isBigEndian = DAG.getDataLayout().isBigEndian();
10486-
if (isBigEndian)
10487-
std::swap (VLo, VHi);
10482+
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0,
10483+
SDValue V1) {
10484+
SDLoc dl(V0.getNode());
1048810485
SDValue RegClass =
1048910486
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1049010487
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1049110488
SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10492-
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10489+
const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
1049310490
return SDValue(
1049410491
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
1049510492
}
1049610493

10494+
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) {
10495+
SDLoc dl(V.getNode());
10496+
auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10497+
bool isBigEndian = DAG.getDataLayout().isBigEndian();
10498+
if (isBigEndian)
10499+
std::swap(VLo, VHi);
10500+
return createGPRPairNode2xi32(DAG, VLo, VHi);
10501+
}
10502+
1049710503
static void ReplaceCMP_SWAP_64Results(SDNode *N,
10498-
SmallVectorImpl<SDValue> & Results,
10499-
SelectionDAG &DAG) {
10504+
SmallVectorImpl<SDValue> &Results,
10505+
SelectionDAG &DAG) {
1050010506
assert(N->getValueType(0) == MVT::i64 &&
1050110507
"AtomicCmpSwap on types less than 64 should be legal");
10502-
SDValue Ops[] = {N->getOperand(1),
10503-
createGPRPairNode(DAG, N->getOperand(2)),
10504-
createGPRPairNode(DAG, N->getOperand(3)),
10505-
N->getOperand(0)};
10508+
SDValue Ops[] = {
10509+
createGPRPairNode2xi32(DAG, N->getOperand(1),
10510+
DAG.getUNDEF(MVT::i32)), // pointer, temp
10511+
createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10512+
createGPRPairNodei64(DAG, N->getOperand(3)), // new
10513+
N->getOperand(0), // chain in
10514+
};
1050610515
SDNode *CmpSwap = DAG.getMachineNode(
1050710516
ARM::CMP_SWAP_64, SDLoc(N),
10508-
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10517+
DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
1050910518

1051010519
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1051110520
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6509,8 +6509,21 @@ def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
65096509
(ins GPR:$addr, GPR:$desired, GPR:$new),
65106510
NoItinerary, []>, Sched<[]>;
65116511

6512-
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
6513-
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
6512+
// The addr_temp and addr_temp_out operands are logically a pair of GPR
6513+
// operands:
6514+
// * addr is an input, holding the address to swap.
6515+
// * temp is a earlyclobber output, used internally in the expansion of the
6516+
// pseudo-inst.
6517+
// These are combined into one GPRPair operand to ensure that register
6518+
// allocation always succeeds. In the worst case there are only 4 GPRPair
6519+
// registers available, of which this instruction needs 3 for the other
6520+
// operands. If these operands weren't combined they would also use two GPR
6521+
// registers, which could overlap with two different GPRPairs, causing
6522+
// allocation to fail. With them combined, we need to allocate 4 GPRPairs,
6523+
// which will always succeed.
6524+
let Constraints = "@earlyclobber $Rd,$addr_temp_out = $addr_temp" in
6525+
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out),
6526+
(ins GPRPair:$addr_temp, GPRPair:$desired, GPRPair:$new),
65146527
NoItinerary, []>, Sched<[]>;
65156528
}
65166529

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=armv7-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=LE
3+
; RUN: llc < %s -mtriple=armv7eb-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=BE
4+
5+
;; Previously, this failed during register allocation because the CMP_SWAP_64
6+
;; pseudo-instruction has a lot of operands, many of which need to be even-odd
7+
;; register pairs, and the over-aligned alloca in this function causes both a
8+
;; frame pointer and a base pointer to be needed.
9+
10+
define void @test(ptr %ptr) {
11+
; CHECK-LABEL: test:
12+
; CHECK: @ %bb.0: @ %entry
13+
; CHECK-NEXT: .save {r4, r5, r6, r8, r9, r10, r11, lr}
14+
; CHECK-NEXT: push {r4, r5, r6, r8, r9, r10, r11, lr}
15+
; CHECK-NEXT: .setfp r11, sp, #24
16+
; CHECK-NEXT: add r11, sp, #24
17+
; CHECK-NEXT: .pad #32
18+
; CHECK-NEXT: sub sp, sp, #32
19+
; CHECK-NEXT: bfc sp, #0, #4
20+
; CHECK-NEXT: mov r6, sp
21+
; CHECK-NEXT: str r0, [r6, #28] @ 4-byte Spill
22+
; CHECK-NEXT: b .LBB0_1
23+
; CHECK-NEXT: .LBB0_1: @ %block1
24+
; CHECK-NEXT: ldr r0, [r6, #28] @ 4-byte Reload
25+
; CHECK-NEXT: mov r1, sp
26+
; CHECK-NEXT: sub r1, r1, #16
27+
; CHECK-NEXT: bic r1, r1, #15
28+
; CHECK-NEXT: mov sp, r1
29+
; CHECK-NEXT: dmb ish
30+
; CHECK-NEXT: ldr r1, [r0]
31+
; CHECK-NEXT: ldr r0, [r0, #4]
32+
; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill
33+
; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill
34+
; CHECK-NEXT: b .LBB0_2
35+
; CHECK-NEXT: .LBB0_2: @ %atomicrmw.start
36+
; CHECK-NEXT: @ =>This Loop Header: Depth=1
37+
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
38+
; CHECK-NEXT: ldr r2, [r6, #24] @ 4-byte Reload
39+
; CHECK-NEXT: ldr r0, [r6, #20] @ 4-byte Reload
40+
; CHECK-NEXT: ldr r8, [r6, #28] @ 4-byte Reload
41+
; LE-NEXT: str r2, [r6, #16] @ 4-byte Spill
42+
; LE-NEXT: str r0, [r6, #12] @ 4-byte Spill
43+
; BE-NEXT: str r2, [r6, #12] @ 4-byte Spill
44+
; BE-NEXT: str r0, [r6, #16] @ 4-byte Spill
45+
; CHECK-NEXT: @ implicit-def: $r1
46+
; CHECK-NEXT: @ implicit-def: $r3
47+
; CHECK-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
48+
; CHECK-NEXT: mov r9, r1
49+
; CHECK-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
50+
; CHECK-NEXT: mov r1, r2
51+
; CHECK-NEXT: mov r12, #0
52+
; CHECK-NEXT: mov r2, r12
53+
; CHECK-NEXT: mov r3, r12
54+
; CHECK-NEXT: .LBB0_3: @ %atomicrmw.start
55+
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
56+
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
57+
; CHECK-NEXT: ldrexd r4, r5, [r8]
58+
; CHECK-NEXT: cmp r4, r0
59+
; CHECK-NEXT: cmpeq r5, r1
60+
; CHECK-NEXT: bne .LBB0_5
61+
; CHECK-NEXT: @ %bb.4: @ %atomicrmw.start
62+
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=2
63+
; CHECK-NEXT: strexd r9, r2, r3, [r8]
64+
; CHECK-NEXT: cmp r9, #0
65+
; CHECK-NEXT: bne .LBB0_3
66+
; CHECK-NEXT: .LBB0_5: @ %atomicrmw.start
67+
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
68+
; CHECK-NEXT: ldr r2, [r6, #12] @ 4-byte Reload
69+
; LE-NEXT: ldr r1, [r6, #16] @ 4-byte Reload
70+
; LE-NEXT: mov r0, r5
71+
; LE-NEXT: eor r3, r0, r1
72+
; LE-NEXT: mov r1, r4
73+
; LE-NEXT: eor r2, r1, r2
74+
; BE-NEXT: ldr r0, [r6, #16] @ 4-byte Reload
75+
; BE-NEXT: mov r1, r4
76+
; BE-NEXT: eor r3, r1, r0
77+
; BE-NEXT: mov r0, r5
78+
; BE-NEXT: eor r2, r0, r2
79+
; CHECK-NEXT: orr r2, r2, r3
80+
; CHECK-NEXT: cmp r2, #0
81+
; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill
82+
; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill
83+
; CHECK-NEXT: bne .LBB0_2
84+
; CHECK-NEXT: b .LBB0_6
85+
; CHECK-NEXT: .LBB0_6: @ %atomicrmw.end
86+
; CHECK-NEXT: dmb ish
87+
; CHECK-NEXT: sub sp, r11, #24
88+
; CHECK-NEXT: pop {r4, r5, r6, r8, r9, r10, r11, pc}
89+
entry:
90+
br label %block1
91+
92+
block1:
93+
%stuff = alloca i8, i64 16, align 16
94+
store atomic i64 0, ptr %ptr seq_cst, align 8
95+
ret void
96+
}

llvm/test/CodeGen/ARM/atomic-load-store.ll

Lines changed: 54 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -327,50 +327,56 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
327327
; ARMOPTNONE-NEXT: push {r4, r5, r7, lr}
328328
; ARMOPTNONE-NEXT: add r7, sp, #8
329329
; ARMOPTNONE-NEXT: push {r8, r10, r11}
330-
; ARMOPTNONE-NEXT: sub sp, sp, #20
331-
; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill
332-
; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill
333-
; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill
330+
; ARMOPTNONE-NEXT: sub sp, sp, #24
331+
; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
332+
; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
333+
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
334334
; ARMOPTNONE-NEXT: dmb ish
335335
; ARMOPTNONE-NEXT: ldr r1, [r0]
336336
; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
337-
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
338-
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
337+
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
338+
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
339339
; ARMOPTNONE-NEXT: b LBB5_1
340340
; ARMOPTNONE-NEXT: LBB5_1: @ %atomicrmw.start
341341
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
342342
; ARMOPTNONE-NEXT: @ Child Loop BB5_2 Depth 2
343-
; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
344-
; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
345-
; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload
346-
; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
347-
; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload
348-
; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
349-
; ARMOPTNONE-NEXT: mov r11, r0
350-
; ARMOPTNONE-NEXT: mov r8, r2
343+
; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
344+
; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
345+
; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload
346+
; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
347+
; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload
348+
; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill
349+
; ARMOPTNONE-NEXT: @ implicit-def: $r1
350+
; ARMOPTNONE-NEXT: @ implicit-def: $r9
351+
; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
351352
; ARMOPTNONE-NEXT: mov r9, r1
353+
; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
354+
; ARMOPTNONE-NEXT: mov r1, r12
355+
; ARMOPTNONE-NEXT: mov r10, r2
356+
; ARMOPTNONE-NEXT: mov r11, r3
352357
; ARMOPTNONE-NEXT: LBB5_2: @ %atomicrmw.start
353358
; ARMOPTNONE-NEXT: @ Parent Loop BB5_1 Depth=1
354359
; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2
355-
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3]
356-
; ARMOPTNONE-NEXT: cmp r4, r8
357-
; ARMOPTNONE-NEXT: cmpeq r5, r9
360+
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8]
361+
; ARMOPTNONE-NEXT: cmp r4, r10
362+
; ARMOPTNONE-NEXT: cmpeq r5, r11
358363
; ARMOPTNONE-NEXT: bne LBB5_4
359364
; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start
360365
; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_2 Depth=2
361-
; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3]
362-
; ARMOPTNONE-NEXT: cmp r0, #0
366+
; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8]
367+
; ARMOPTNONE-NEXT: cmp r9, #0
363368
; ARMOPTNONE-NEXT: bne LBB5_2
364369
; ARMOPTNONE-NEXT: LBB5_4: @ %atomicrmw.start
365370
; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_1 Depth=1
371+
; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload
366372
; ARMOPTNONE-NEXT: mov r0, r5
367373
; ARMOPTNONE-NEXT: eor r3, r0, r1
368374
; ARMOPTNONE-NEXT: mov r1, r4
369375
; ARMOPTNONE-NEXT: eor r2, r1, r2
370376
; ARMOPTNONE-NEXT: orr r2, r2, r3
371377
; ARMOPTNONE-NEXT: cmp r2, #0
372-
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
373-
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
378+
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
379+
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
374380
; ARMOPTNONE-NEXT: bne LBB5_1
375381
; ARMOPTNONE-NEXT: b LBB5_5
376382
; ARMOPTNONE-NEXT: LBB5_5: @ %atomicrmw.end
@@ -861,52 +867,58 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
861867
; ARMOPTNONE-NEXT: push {r4, r5, r7, lr}
862868
; ARMOPTNONE-NEXT: add r7, sp, #8
863869
; ARMOPTNONE-NEXT: push {r8, r10, r11}
864-
; ARMOPTNONE-NEXT: sub sp, sp, #20
865-
; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill
870+
; ARMOPTNONE-NEXT: sub sp, sp, #24
871+
; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
866872
; ARMOPTNONE-NEXT: vmov d16, r1, r2
867873
; ARMOPTNONE-NEXT: vmov r1, r2, d16
868-
; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill
869-
; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill
874+
; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
875+
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
870876
; ARMOPTNONE-NEXT: dmb ish
871877
; ARMOPTNONE-NEXT: ldr r1, [r0]
872878
; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
873-
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
874-
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
879+
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
880+
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
875881
; ARMOPTNONE-NEXT: b LBB13_1
876882
; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start
877883
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
878884
; ARMOPTNONE-NEXT: @ Child Loop BB13_2 Depth 2
879-
; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
880-
; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
881-
; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload
882-
; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
883-
; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload
884-
; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
885-
; ARMOPTNONE-NEXT: mov r11, r0
886-
; ARMOPTNONE-NEXT: mov r8, r2
885+
; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
886+
; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
887+
; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload
888+
; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
889+
; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload
890+
; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill
891+
; ARMOPTNONE-NEXT: @ implicit-def: $r1
892+
; ARMOPTNONE-NEXT: @ implicit-def: $r9
893+
; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
887894
; ARMOPTNONE-NEXT: mov r9, r1
895+
; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
896+
; ARMOPTNONE-NEXT: mov r1, r12
897+
; ARMOPTNONE-NEXT: mov r10, r2
898+
; ARMOPTNONE-NEXT: mov r11, r3
888899
; ARMOPTNONE-NEXT: LBB13_2: @ %atomicrmw.start
889900
; ARMOPTNONE-NEXT: @ Parent Loop BB13_1 Depth=1
890901
; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2
891-
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3]
892-
; ARMOPTNONE-NEXT: cmp r4, r8
893-
; ARMOPTNONE-NEXT: cmpeq r5, r9
902+
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8]
903+
; ARMOPTNONE-NEXT: cmp r4, r10
904+
; ARMOPTNONE-NEXT: cmpeq r5, r11
894905
; ARMOPTNONE-NEXT: bne LBB13_4
895906
; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start
896907
; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_2 Depth=2
897-
; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3]
898-
; ARMOPTNONE-NEXT: cmp r0, #0
908+
; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8]
909+
; ARMOPTNONE-NEXT: cmp r9, #0
899910
; ARMOPTNONE-NEXT: bne LBB13_2
900911
; ARMOPTNONE-NEXT: LBB13_4: @ %atomicrmw.start
901912
; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_1 Depth=1
913+
; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload
902914
; ARMOPTNONE-NEXT: mov r0, r5
903915
; ARMOPTNONE-NEXT: eor r3, r0, r1
904916
; ARMOPTNONE-NEXT: mov r1, r4
905917
; ARMOPTNONE-NEXT: eor r2, r1, r2
906918
; ARMOPTNONE-NEXT: orr r2, r2, r3
907919
; ARMOPTNONE-NEXT: cmp r2, #0
908-
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
909-
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
920+
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
921+
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
910922
; ARMOPTNONE-NEXT: bne LBB13_1
911923
; ARMOPTNONE-NEXT: b LBB13_5
912924
; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end

0 commit comments

Comments
 (0)