Skip to content

Commit 83c7784

Browse files
authored
[AArch64] Don't emit Neon in streaming[-compatible] functions with -fzero-call-used-regs (llvm#116995)
Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly emit Neon instructions in streaming functions, and streaming-compatible functions without SVE. With this change: * In streaming functions, Z/p registers will be zeroed * In streaming compatible functions w/o SVE, D registers will be zeroed - (As Neon vector instructions are illegal including `movi v..`)
1 parent 5bdee35 commit 83c7784

File tree

3 files changed

+361
-194
lines changed

3 files changed

+361
-194
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
10101010

10111011
BitVector GPRsToZero(TRI.getNumRegs());
10121012
BitVector FPRsToZero(TRI.getNumRegs());
1013-
bool HasSVE = STI.hasSVE();
1013+
bool HasSVE = STI.isSVEorStreamingSVEAvailable();
10141014
for (MCRegister Reg : RegsToZero.set_bits()) {
10151015
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10161016
// For GPRs, we only care to clear out the 64-bit register.

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9700,13 +9700,20 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
97009700

97019701
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
97029702
BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9703-
} else if (STI.hasSVE()) {
9703+
} else if (STI.isSVEorStreamingSVEAvailable()) {
97049704
BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
97059705
.addImm(0)
97069706
.addImm(0);
9707-
} else {
9707+
} else if (STI.isNeonAvailable()) {
97089708
BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
97099709
.addImm(0);
9710+
} else {
9711+
// This is a streaming-compatible function without SVE. We don't have full
9712+
// Neon (just FPRs), so we can at most use the first 64-bit sub-register.
9713+
// So given `movi v..` would be illegal use `fmov d..` instead.
9714+
assert(STI.hasNEON() && "Expected to have NEON.");
9715+
Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
9716+
BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
97109717
}
97119718
}
97129719

0 commit comments

Comments
 (0)