Skip to content

Commit beb37e2

Browse files
authored
[AArch64] Don't replace dst of SWP instructions with (X|W)ZR (#102139)
This change updates the AArch64DeadRegisterDefinition pass to ensure it does not replace the destination register of a SWP instruction with the zero register when its value is unused. This is necessary to ensure that the ordering of such instructions in relation to DMB.LD barries adheres to the definitions of the AArch64 Memory Model. The memory model states the following (ARMARM version DDI 0487K.a §B2.3.7): ``` Barrier-ordered-before An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies: [...] * All of the following apply: - E1 is a Memory Read effect. - E1 is generated by an instruction whose destination register is not WZR or XZR. - E1 appears in program order before E3. - E3 is either a DMB LD effect or a DSB LD effect. - E3 appears in program order before E2. ``` Prior to this change, by replacing the destination register of such SWP instruction with WZR/XZR, the ordering relation described above was incorrectly removed from the generated code. The new behaviour is ensured in this patch by adding the relevant `SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero` predicate, which already covered the `LD<Op>` instructions that are subject to the same effect. Fixes #68428.
1 parent 3968942 commit beb37e2

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
108108
case AArch64::LDUMINW: case AArch64::LDUMINX:
109109
case AArch64::LDUMINLB: case AArch64::LDUMINLH:
110110
case AArch64::LDUMINLW: case AArch64::LDUMINLX:
111+
case AArch64::SWPB: case AArch64::SWPH:
112+
case AArch64::SWPW: case AArch64::SWPX:
113+
case AArch64::SWPLB: case AArch64::SWPLH:
114+
case AArch64::SWPLW: case AArch64::SWPLX:
111115
return true;
112116
}
113117
return false;
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O0 | FileCheck %s
2+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O1 | FileCheck %s
3+
4+
; When their destination register is WZR/ZZR, SWP operations are not regarded as
5+
; a read for the purpose of a DMB.LD in the AArch64 memory model.
6+
; This test ensures that the AArch64DeadRegisterDefinitions pass does not
7+
; replace the desitnation register of SWP instructions with the zero register
8+
; when the read value is unused.
9+
10+
define dso_local i32 @atomic_exchange_monotonic(ptr %ptr, ptr %ptr2, i32 %value) {
11+
; CHECK-LABEL: atomic_exchange_monotonic:
12+
; CHECK: // %bb.0:
13+
; CHECK-NEXT: swp
14+
; CHECK-NOT: wzr
15+
; CHECK-NEXT: dmb ishld
16+
; CHECK-NEXT: ldr w0, [x1]
17+
; CHECK-NEXT: ret
18+
%r0 = atomicrmw xchg ptr %ptr, i32 %value monotonic
19+
fence acquire
20+
%r1 = load atomic i32, ptr %ptr2 monotonic, align 4
21+
ret i32 %r1
22+
}
23+
24+
define dso_local i32 @atomic_exchange_acquire(ptr %ptr, ptr %ptr2, i32 %value) {
25+
; CHECK-LABEL: atomic_exchange_acquire:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: swpa
28+
; CHECK-NOT: wzr
29+
; CHECK-NEXT: dmb ishld
30+
; CHECK-NEXT: ldr w0, [x1]
31+
; CHECK-NEXT: ret
32+
%r0 = atomicrmw xchg ptr %ptr, i32 %value acquire
33+
fence acquire
34+
%r1 = load atomic i32, ptr %ptr2 monotonic, align 4
35+
ret i32 %r1
36+
}
37+
38+
define dso_local i32 @atomic_exchange_release(ptr %ptr, ptr %ptr2, i32 %value) {
39+
; CHECK-LABEL: atomic_exchange_release:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: swpl
42+
; CHECK-NOT: wzr
43+
; CHECK-NEXT: dmb ishld
44+
; CHECK-NEXT: ldr w0, [x1]
45+
; CHECK-NEXT: ret
46+
%r0 = atomicrmw xchg ptr %ptr, i32 %value release
47+
fence acquire
48+
%r1 = load atomic i32, ptr %ptr2 monotonic, align 4
49+
ret i32 %r1
50+
}
51+
52+
define dso_local i32 @atomic_exchange_acquire_release(ptr %ptr, ptr %ptr2, i32 %value) {
53+
; CHECK-LABEL: atomic_exchange_acquire_release:
54+
; CHECK: // %bb.0:
55+
; CHECK-NEXT: swpal
56+
; CHECK-NOT: wzr
57+
; CHECK-NEXT: dmb ishld
58+
; CHECK-NEXT: ldr w0, [x1]
59+
; CHECK-NEXT: ret
60+
%r0 = atomicrmw xchg ptr %ptr, i32 %value acq_rel
61+
fence acquire
62+
%r1 = load atomic i32, ptr %ptr2 monotonic, align 4
63+
ret i32 %r1
64+
}

0 commit comments

Comments
 (0)