Skip to content

Commit b71434f

Browse files
[AArch64] Avoid NEON ORR when NEON and SVE are unavailable (#93940)
For streaming-compatible functions with only +sme, we can't use a NEON ORR (aliased as 'mov') for copies of Q-registers, so we need to use a spill/fill instead. This also fixes the fill, which should use the post-incrementing addressing mode.
1 parent ccb73e8 commit b71434f

File tree

4 files changed

+28
-16
lines changed

4 files changed

+28
-16
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4661,7 +4661,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
46614661
.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
46624662
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
46634663
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4664-
else if (Subtarget.hasNEON())
4664+
else if (Subtarget.isNeonAvailable())
46654665
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
46664666
.addReg(SrcReg)
46674667
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -4671,7 +4671,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
46714671
.addReg(SrcReg, getKillRegState(KillSrc))
46724672
.addReg(AArch64::SP)
46734673
.addImm(-16);
4674-
BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4674+
BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
46754675
.addReg(AArch64::SP, RegState::Define)
46764676
.addReg(DestReg, RegState::Define)
46774677
.addReg(AArch64::SP)
Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s | FileCheck %s
23

34
define float @copy_FPR32(float %a, float %b) {
4-
;CHECK-LABEL: copy_FPR32:
5-
;CHECK: fmov s0, s1
5+
; CHECK-LABEL: copy_FPR32:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: fmov s0, s1
8+
; CHECK-NEXT: ret
69
ret float %b;
710
}
8-
11+
912
define double @copy_FPR64(double %a, double %b) {
10-
;CHECK-LABEL: copy_FPR64:
11-
;CHECK: fmov d0, d1
13+
; CHECK-LABEL: copy_FPR64:
14+
; CHECK: // %bb.0:
15+
; CHECK-NEXT: fmov d0, d1
16+
; CHECK-NEXT: ret
1217
ret double %b;
1318
}
14-
19+
1520
define fp128 @copy_FPR128(fp128 %a, fp128 %b) {
16-
;CHECK-LABEL: copy_FPR128:
17-
;CHECK: str q1, [sp, #-16]!
18-
;CHECK-NEXT: ldr q0, [sp, #16]!
21+
; CHECK-LABEL: copy_FPR128:
22+
; CHECK: // %bb.0:
23+
; CHECK-NEXT: str q1, [sp, #-16]!
24+
; CHECK-NEXT: ldr q0, [sp], #16
25+
; CHECK-NEXT: ret
1926
ret fp128 %b;
2027
}

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,8 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
980980
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_3
981981
; NONEON-NOSVE-NEXT: b .LBB3_4
982982
; NONEON-NOSVE-NEXT: .LBB3_2:
983-
; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
983+
; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
984+
; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
984985
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_4
985986
; NONEON-NOSVE-NEXT: .LBB3_3: // %cond.load1
986987
; NONEON-NOSVE-NEXT: ldrb w10, [x0, #1]
@@ -2095,7 +2096,8 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
20952096
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_3
20962097
; NONEON-NOSVE-NEXT: b .LBB7_4
20972098
; NONEON-NOSVE-NEXT: .LBB7_2:
2098-
; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
2099+
; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
2100+
; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
20992101
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_4
21002102
; NONEON-NOSVE-NEXT: .LBB7_3: // %cond.load1
21012103
; NONEON-NOSVE-NEXT: ldr h2, [x0, #2]
@@ -2616,7 +2618,8 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
26162618
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_3
26172619
; NONEON-NOSVE-NEXT: b .LBB10_4
26182620
; NONEON-NOSVE-NEXT: .LBB10_2:
2619-
; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
2621+
; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
2622+
; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
26202623
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_4
26212624
; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.load1
26222625
; NONEON-NOSVE-NEXT: ldr s2, [x0, #4]
@@ -2839,7 +2842,8 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
28392842
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB12_3
28402843
; NONEON-NOSVE-NEXT: b .LBB12_4
28412844
; NONEON-NOSVE-NEXT: .LBB12_2:
2842-
; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
2845+
; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
2846+
; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
28432847
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB12_4
28442848
; NONEON-NOSVE-NEXT: .LBB12_3: // %cond.load1
28452849
; NONEON-NOSVE-NEXT: ldr d2, [x0, #8]

llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ define fp128 @test_streaming_compatible_register_mov(fp128 %q0, fp128 %q1) {
1515
;
1616
; NONEON-NOSVE-LABEL: test_streaming_compatible_register_mov:
1717
; NONEON-NOSVE: // %bb.0:
18-
; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
18+
; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
19+
; NONEON-NOSVE-NEXT: ldr q0, [sp], #16
1920
; NONEON-NOSVE-NEXT: ret
2021
ret fp128 %q1
2122
}

0 commit comments

Comments
 (0)